# The code for 3D CNN for Action Recognition
# Please refer to the youtube video for this lesson
3D CNN-Action Recognition Part-1
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution3D, MaxPooling3D
from keras.optimizers import SGD, RMSprop
from keras.utils import np_utils, generic_utils
import theano
import os
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import cv2
from sklearn.cross_validation import train_test_split
from sklearn import cross_validation
from sklearn import preprocessing
# image specification
img_rows,img_cols,img_depth=16,16,15
# Training data
X_tr=[] # variable to store entire dataset
#Reading boxing action class
listing = os.listdir('kth dataset/boxing')
for vid in listing:
vid = 'kth dataset/boxing/'+vid
frames = []
cap = cv2.VideoCapture(vid)
fps = cap.get(5)
print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)
for k in xrange(15):
ret, frame = cap.read()
frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frames.append(gray)
#plt.imshow(gray, cmap = plt.get_cmap('gray'))
#plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
#plt.show()
#cv2.imshow('frame',gray)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
input=np.array(frames)
print input.shape
ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
print ipt.shape
X_tr.append(ipt)
#Reading hand clapping action class
listing2 = os.listdir('kth dataset/handclapping')
for vid2 in listing2:
vid2 = 'kth dataset/handclapping/'+vid2
frames = []
cap = cv2.VideoCapture(vid2)
fps = cap.get(5)
print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)
for k in xrange(15):
ret, frame = cap.read()
frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frames.append(gray)
#plt.imshow(gray, cmap = plt.get_cmap('gray'))
#plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
#plt.show()
#cv2.imshow('frame',gray)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
input=np.array(frames)
print input.shape
ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
print ipt.shape
X_tr.append(ipt)
#Reading hand waving action class
listing3 = os.listdir('kth dataset/handwaving')
for vid3 in listing3:
vid3 = 'kth dataset/handwaving/'+vid3
frames = []
cap = cv2.VideoCapture(vid3)
fps = cap.get(5)
print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)
for k in xrange(15):
ret, frame = cap.read()
frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frames.append(gray)
#plt.imshow(gray, cmap = plt.get_cmap('gray'))
#plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
#plt.show()
#cv2.imshow('frame',gray)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
input=np.array(frames)
print input.shape
ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
print ipt.shape
X_tr.append(ipt)
#Reading jogging action class
listing4 = os.listdir('kth dataset/jogging')
for vid4 in listing4:
vid4 = 'kth dataset/jogging/'+vid4
frames = []
cap = cv2.VideoCapture(vid4)
fps = cap.get(5)
print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)
for k in xrange(15):
ret, frame = cap.read()
frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frames.append(gray)
#plt.imshow(gray, cmap = plt.get_cmap('gray'))
#plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
#plt.show()
#cv2.imshow('frame',gray)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
input=np.array(frames)
print input.shape
ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
print ipt.shape
X_tr.append(ipt)
#Reading running action class
listing5 = os.listdir('kth dataset/running')
for vid5 in listing5:
vid5 = 'kth dataset/running/'+vid5
frames = []
cap = cv2.VideoCapture(vid5)
fps = cap.get(5)
print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)
for k in xrange(15):
ret, frame = cap.read()
frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frames.append(gray)
#plt.imshow(gray, cmap = plt.get_cmap('gray'))
#plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
#plt.show()
#cv2.imshow('frame',gray)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
input=np.array(frames)
print input.shape
ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
print ipt.shape
X_tr.append(ipt)
#Reading walking action class
listing6 = os.listdir('kth dataset/walking')
for vid6 in listing6:
vid6 = 'kth dataset/walking/'+vid6
frames = []
cap = cv2.VideoCapture(vid6)
fps = cap.get(5)
print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)
for k in xrange(15):
ret, frame = cap.read()
frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frames.append(gray)
#plt.imshow(gray, cmap = plt.get_cmap('gray'))
#plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
#plt.show()
#cv2.imshow('frame',gray)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
input=np.array(frames)
print input.shape
ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
print ipt.shape
X_tr.append(ipt)
X_tr_array = np.array(X_tr) # convert the frames read into array
num_samples = len(X_tr_array)
print num_samples
#Assign Label to each class
label=np.ones((num_samples,),dtype = int)
label[0:100]= 0
label[100:199] = 1
label[199:299] = 2
label[299:399] = 3
label[399:499]= 4
label[499:] = 5
train_data = [X_tr_array,label]
(X_train, y_train) = (train_data[0],train_data[1])
print('X_Train shape:', X_train.shape)
train_set = np.zeros((num_samples, 1, img_rows,img_cols,img_depth))
for h in xrange(num_samples):
train_set[h][0][:][:][:]=X_train[h,:,:,:]
patch_size = 15 # img_depth or number of frames used for each video
print(train_set.shape, 'train samples')
# CNN Training parameters
batch_size = 2
nb_classes = 6
nb_epoch =50
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)
# number of convolutional filters to use at each layer
nb_filters = [32, 32]
# level of pooling to perform at each layer (POOL x POOL)
nb_pool = [3, 3]
# level of convolution to perform at each layer (CONV x CONV)
nb_conv = [5,5]
# Pre-processing
train_set = train_set.astype('float32')
train_set -= np.mean(train_set)
train_set /=np.max(train_set)
# Define model
model = Sequential()
model.add(Convolution3D(nb_filters[0],nb_depth=nb_conv[0], nb_row=nb_conv[0], nb_col=nb_conv[0], input_shape=(1, img_rows, img_cols, patch_size), activation='relu'))
model.add(MaxPooling3D(pool_size=(nb_pool[0], nb_pool[0], nb_pool[0])))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128, init='normal', activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes,init='normal'))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='RMSprop')
# Split the data
X_train_new, X_val_new, y_train_new,y_val_new = train_test_split(train_set, Y_train, test_size=0.2, random_state=4)
# Train the model
hist = model.fit(X_train_new, y_train_new, validation_data=(X_val_new,y_val_new),
batch_size=batch_size,nb_epoch = nb_epoch,show_accuracy=True,shuffle=True)
#hist = model.fit(train_set, Y_train, batch_size=batch_size,
# nb_epoch=nb_epoch,validation_split=0.2, show_accuracy=True,
# shuffle=True)
# Evaluate the model
score = model.evaluate(X_val_new, y_val_new, batch_size=batch_size, show_accuracy=True)
print('Test score:', score[0])
print('Test accuracy:', score[1])
# Plot the results
train_loss=hist.history['loss']
val_loss=hist.history['val_loss']
train_acc=hist.history['acc']
val_acc=hist.history['val_acc']
xc=range(100)
plt.figure(1,figsize=(7,5))
plt.plot(xc,train_loss)
plt.plot(xc,val_loss)
plt.xlabel('num of Epochs')
plt.ylabel('loss')
plt.title('train_loss vs val_loss')
plt.grid(True)
plt.legend(['train','val'])
print plt.style.available # use bmh, classic,ggplot for big pictures
plt.style.use(['classic'])
plt.figure(2,figsize=(7,5))
plt.plot(xc,train_acc)
plt.plot(xc,val_acc)
plt.xlabel('num of Epochs')
plt.ylabel('accuracy')
plt.title('train_acc vs val_acc')
plt.grid(True)
plt.legend(['train','val'],loc=4)
#print plt.style.available # use bmh, classic,ggplot for big pictures
plt.style.use(['classic'])
Hi man! Thanks a lot for your post. But I have a problem I can't solve by google for a long time. That's "TypeError: __init__() takes at least 5 arguments (5 given)" When model add Convolution3D. Do u have this problem solved? Thanks in advance!
ReplyDeletemodel.add(Convolution3D(nb_filters[0], kernel_dim1=nb_conv[0], kernel_dim2=nb_conv[0], kernel_dim3=nb_conv[0],
Deleteinput_shape=(1, img_rows, img_cols, img_depth), activation='relu'))
Convolution3D parameter order has changed a little.
Thanks a lot! I also find out it recently~
DeleteTypeError: __init__() missing 1 required positional argument: 'kernel_size'
Deletei am facing this error while loading the 3d model. Can some1 help me please
This comment has been removed by the author.
ReplyDeleteThis comment has been removed by the author.
ReplyDeleteEven I tried the identical code written here, I am not getting loss value and my accuracy does not change, like this:
ReplyDelete2s - loss: nan - acc: 0.1729 - val_loss: nan - val_acc: 0.1417.
Also, testing took 0 second: 120/120 [==============================] - 0s
So, at the end, I have the output like this:
('Test score:', nan)
('Test accuracy:', 0.14166666865348815).
What would be the reason, I couldn't solve the problem here, any help would be appreciated.
Thanks for the tutorial and the code !
This comment has been removed by the author.
ReplyDeleteThis comment has been removed by the author.
ReplyDeleteThis comment has been removed by the author.
Deletemodel.add(Dense(128, init='normal', activation='relu'))
ReplyDeleteValueError: negative dimensions are not allowed
Getting this error. Can you please rectify it why?
Just add the following code bro:
Deletefrom keras import backend as K
K.set_image_dim_ordering('th')
its working
DeleteVery Nice work..
ReplyDeleteHi there,
ReplyDeleteAs per your suggestion I updated line model.add(Convolution3D...
Now I am getting following error
('X_Train shape:', (599, 16, 16, 15))
((599, 1, 16, 16, 15), 'train samples')
Traceback (most recent call last):
File "test3d.py", line 302, in
model.add(Convolution3D(nb_filters[0], kernel_dim1=nb_conv[0], kernel_dim2=nb_conv[0], kernel_dim3=nb_conv[0],input_shape=(1, img_rows, img_cols, img_depth), activation='relu'))
File "/home/dejan/anaconda2/lib/python2.7/site-packages/keras/models.py", line 299, in add
layer.create_input_layer(batch_input_shape, input_dtype)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/keras/engine/topology.py", line 401, in create_input_layer
self(x)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/keras/engine/topology.py", line 572, in __call__
self.add_inbound_node(inbound_layers, node_indices, tensor_indices)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/keras/engine/topology.py", line 635, in add_inbound_node
Node.create_node(self, inbound_layers, node_indices, tensor_indices)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/keras/engine/topology.py", line 166, in create_node
output_tensors = to_list(outbound_layer.call(input_tensors[0], mask=input_masks[0]))
File "/home/dejan/anaconda2/lib/python2.7/site-packages/keras/layers/convolutional.py", line 1234, in call
filter_shape=self.W_shape)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 2831, in conv3d
x = tf.nn.conv3d(x, kernel, strides, padding)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 522, in conv3d
strides=strides, padding=padding, name=name)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
op_def=op_def)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2397, in create_op
set_shapes_for_outputs(ret)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1757, in set_shapes_for_outputs
shapes = shape_func(op)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1707, in call_with_requiring
return call_cpp_shape_fn(op, require_shape_fn=True)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 610, in call_cpp_shape_fn
debug_python_shape_fn, require_shape_fn)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 675, in _call_cpp_shape_fn_impl
raise ValueError(err.message)
ValueError: Negative dimension size caused by subtracting 5 from 1 for 'Conv3D' (op: 'Conv3D') with input shapes: [?,1,16,16,15], [5,5,5,15,32].
Any idea what is causing this error.
Thanks,
Dejan
use theano backend
DeleteGreat tutorial!!!
ReplyDeleteWhat about color video. What changes are necessary for color video clips.
I'm sorry could you explain that Why we have to roll our video dimention ??
ReplyDeleteThis comment has been removed by the author.
ReplyDeleteHi Anuj
ReplyDeletecan you tell me how to move from tensorflow backend to theano backend because i have install thenao backend and i am using anaconda3 and python3.6 when i am running first cell (means from keras....) i am getting like using tensorflow as backend in IPython console
Hey do you have a pretrained model of this cnn?
ReplyDeleteAfter training how to predict in new video???
ReplyDeletehow much is the accuracy for this?
ReplyDeletehave u used GPU or trained on CPU?
I face error in this line: model.add(Convolution3D(nb_filters[0],nb_depth=nb_conv[0], nb_row=nb_conv[0], nb_col=nb_conv[0], input_shape=(1, img_rows, img_cols, patch_size), activation='relu'))
ReplyDeletethe error is TypeError: __init__() takes at least 3 arguments (3 given)
can you please help me
add
Deletefrom keras import backend as K
K.set_image_dim_ordering('th')
I can't run this line: model.compile(loss = 'categorical_crossentropy', optimizer = 'RMSprop')
ReplyDeleteIt shows that no conv3d function. Anyone know how to solve?
how a can test a video for this train script can you give me a script of prediction
ReplyDeletei face error TypeError: __init__() missing 1 required positional argument: 'kernel_size'
ReplyDeletecan you please help me ?
This comment has been removed by the author.
DeleteTry this:
Deletemodel.add(Convolution3D(nb_filters[0],kernel_dim1=nb_conv[0], kernel_dim2=nb_conv[0],kernel_dim3=nb_conv[0], input_shape=(1, img_rows, img_cols, patch_size), activation='relu'))
After this am getting new error...let try from your end
model.add(Dense(128, activation='relu', kernel_initializer='normal'))
DeleteValueError: ('Non-positive dimensions not allowed in size.', (-512, 128), -512)
I'm getting this error. Need help
if cv2.waitKey(1) & 0xFF == ord('q'):
ReplyDeletebreak
I am getting error at this line. function is not implemented. rebuild the libraries.
train_data = [X_tr_array,label]
ReplyDelete
(X_train, y_train) = (train_data[0],train_data[1])
#print('X_Train shape:', X_train.shape)
#print('y_Train shape:', y_train.shape)
train_set = np.zeros((num_samples, 1,img_rows,img_cols,img_depth))
print (X_train.shape)
print (train_set.shape)
for h in range(num_samples):
train_set[h][0][:][:][:]=X_train[h,:,:,:]
-----------------------------------------------------------------------
IndexError Traceback (most recent call last)
in
18
19 for h in range(num_samples):
---> 20 train_set[h][0][:][:][:]=X_train[h,:,:,:]
IndexError: too many indices for array
Can anyone help me with this error.
you have resolved this issue or not? i'm also getting this error. please tell if got the solution.
Deletemodel.add(Dense(128, activation='relu', kernel_initializer='normal'))
ReplyDeleteValueError: ('Non-positive dimensions not allowed in size.', (-512, 128), -512)
I'm getting this error. Need help
hi do u complete this project.
Deletehow to test it on real time video
ReplyDeleteThanks for the information. keep sharing.
ReplyDelete
ReplyDeletecan you help me to solve this issue..
DisabledFunctionError Traceback (most recent call last)
in ()
45 plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
46 plt.show()
---> 47 cv2.imshow('frame',gray)
48 if cv2.waitKey(1) & 0xFF == ord('q'):
49 break
/usr/local/lib/python3.6/dist-packages/google/colab/_import_hooks/_cv2.py in wrapped(*args, **kwargs)
50 def wrapped(*args, **kwargs):
51 if not os.environ.get(env_var, False):
---> 52 raise DisabledFunctionError(message, name or func.__name__)
53 return func(*args, **kwargs)
54
DisabledFunctionError: cv2.imshow() is disabled in Colab, because it causes Jupyter sessions
to crash; see https://github.com/jupyter/notebook/issues/3935.
As a substitution, consider using
from google.colab.patches import cv2_imshow
can you help me to solve this error
Delete-----
TypeError Traceback (most recent call last)
/content/gdrive/My Drive/kth-dataset_action/kth_recognition.py in ()
290
291 model = Sequential()
--> 292 model.add(Convolution3D(nb_filters[0],nb_depth=nb_conv[0], nb_row=nb_conv[0], nb_col=nb_conv[0], input_shape=(1, img_rows, img_cols, patch_size), activation='relu'))
293 model.add(MaxPooling3D(pool_size=(nb_pool[0], nb_pool[0], nb_pool[0])))
294
TypeError: __init__() missing 1 required positional argument: 'kernel_size'
for this code
ReplyDeletefrom keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution3D, MaxPooling3D
from keras.optimizers import SGD, RMSprop
from keras.utils import np_utils, generic_utils
import theano
import os
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import cv2
from sklearn.cross_validation import train_test_split
from sklearn import cross_validation
from sklearn import preprocessing
# image specification
img_rows,img_cols,img_depth=128,128,15
# Training data
X_tr=[] # variable to store entire dataset
#Reading boxing action class
listing = os.listdir('F:/UT INTERACTION/KTH/boxing')
for vid in listing:
vid = 'F:/UT INTERACTION/KTH/boxing/'+vid
frames = []
cap = cv2.VideoCapture(vid)
#fps = cap.get(5)
#print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)
for k in xrange(15):
frame = cap.read()
frame = np.asarray(frame, dtype=np.uint8)
frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frames.append(gray)
#plt.imshow(gray, cmap = plt.get_cmap('gray'))
#plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
#plt.show()
#cv2.imshow('frame',gray)
cv2.imshow('frame',gray)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
input=np.array(frames)
print input.shape
ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
print ipt.shape
X_tr.append(ipt)
getting error
File "C:\Users\LENOVO\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3267, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "", line 1, in
runfile('F:/activity datasets/readVideo.py', wdir='F:/activity datasets')
File "C:\Users\LENOVO\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 704, in runfile
execfile(filename, namespace)
File "C:\Users\LENOVO\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 108, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "F:/activity datasets/readVideo.py", line 44
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
^
IndentationError: unexpected indent
could you please help me
model.add(Conv3D(nb_filters[0],kernel_dim1=nb_conv[0],kernel_dim2=nb_conv[0],kernel_dim3=nb_conv[0],input_shape=(1, img_rows, img_cols, img_depth), activation='relu'))
ReplyDeleteTypeError: __init__() missing 1 required positional argument: 'kernel_size'
hist = model.fit(train_set, Y_train, batch_size=batch_size, epochs=nb_epoch,validation_split=0.2,shuffle=True)
ReplyDeletegetting error when i run this code
error is
ValueError: Input 0 of layer sequential_5 is incompatible with the layer: expected axis -1 of input shape to have value 5 but received input with shape (None, 1, 16, 16, 15)
how can i get this original image sources?
ReplyDelete