diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bd19377 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/dataset +/output diff --git a/common_defs.py b/common_defs.py new file mode 100644 index 0000000..26bbb35 --- /dev/null +++ b/common_defs.py @@ -0,0 +1,41 @@ +import mahotas +import cv2 + +# fixed-sizes for image +fixed_size = tuple((500, 500)) + +# path to training data +train_path = "dataset/train" + +# bins for histogram +bins = 8 + + +# feature-descriptor-1: Hu Moments +def fd_hu_moments(image): + image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + feature = cv2.HuMoments(cv2.moments(image)).flatten() + return feature + + +# feature-descriptor-2: Haralick Texture +def fd_haralick(image): + # convert the image to grayscale + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + # compute the haralick texture feature vector + haralick = mahotas.features.haralick(gray).mean(axis=0) + # return the result + return haralick + + +# feature-descriptor-3: Color Histogram +def fd_histogram(image, mask=None): + # convert the image to HSV color-space + image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + # compute the color histogram + hist = cv2.calcHist( + [image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256]) + # normalize the histogram + cv2.normalize(hist, hist) + # return the histogram + return hist.flatten() diff --git a/global.py b/global.py index f603506..43aae24 100644 --- a/global.py +++ b/global.py @@ -6,54 +6,11 @@ from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import MinMaxScaler import numpy as np -import mahotas import cv2 import os import h5py -# fixed-sizes for image -fixed_size = tuple((500, 500)) - -# path to training data -train_path = "dataset/train" - -# no.of.trees for Random Forests -num_trees = 100 - -# bins for histogram -bins = 8 - -# train_test_split size -test_size = 0.10 - -# seed for reproducing same results -seed = 9 - -# feature-descriptor-1: Hu Moments -def fd_hu_moments(image): - image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - feature = cv2.HuMoments(cv2.moments(image)).flatten() - return feature - -# feature-descriptor-2: Haralick Texture -def fd_haralick(image): - # convert the image to grayscale - gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - # compute the haralick texture feature vector - haralick = mahotas.features.haralick(gray).mean(axis=0) - # return the result - return haralick - -# feature-descriptor-3: Color Histogram -def fd_histogram(image, mask=None): - # convert the image to HSV color-space - image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) - # compute the color histogram - hist = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256]) - # normalize the histogram - cv2.normalize(hist, hist) - # return the histogram - return hist.flatten() +from common_defs import fd_hu_moments, fd_haralick, fd_histogram, fixed_size, train_path # get the training labels train_labels = os.listdir(train_path) @@ -108,30 +65,30 @@ def fd_histogram(image, mask=None): i += 1 k += 1 - print "[STATUS] processed folder: {}".format(current_label) + print("[STATUS] processed folder: {}".format(current_label)) j += 1 -print "[STATUS] completed Global Feature Extraction..." +print("[STATUS] completed Global Feature Extraction...") # get the overall feature vector size -print "[STATUS] feature vector size {}".format(np.array(global_features).shape) +print("[STATUS] feature vector size {}".format(np.array(global_features).shape)) # get the overall training label size -print "[STATUS] training Labels {}".format(np.array(labels).shape) +print("[STATUS] training Labels {}".format(np.array(labels).shape)) # encode the target labels targetNames = np.unique(labels) le = LabelEncoder() target = le.fit_transform(labels) -print "[STATUS] training labels encoded..." +print("[STATUS] training labels encoded...") # normalize the feature vector in the range (0-1) scaler = MinMaxScaler(feature_range=(0, 1)) rescaled_features = scaler.fit_transform(global_features) -print "[STATUS] feature vector normalized..." +print("[STATUS] feature vector normalized...") -print "[STATUS] target labels: {}".format(target) -print "[STATUS] target labels shape: {}".format(target.shape) +print("[STATUS] target labels: {}".format(target)) +print("[STATUS] target labels shape: {}".format(target.shape)) # save the feature vector using HDF5 h5f_data = h5py.File('output/data.h5', 'w') @@ -143,4 +100,4 @@ def fd_histogram(image, mask=None): h5f_data.close() h5f_label.close() -print "[STATUS] end of training.." \ No newline at end of file +print("[STATUS] end of training..") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..14cc100 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +sklearn +h5py +opencv-python +mahotas +scipy +matplotlib diff --git a/train_test.py b/train_test.py index 449ef0e..b2758b2 100644 --- a/train_test.py +++ b/train_test.py @@ -6,12 +6,10 @@ import h5py import numpy as np import os -import glob import cv2 from matplotlib import pyplot from sklearn.model_selection import train_test_split, cross_val_score -from sklearn.model_selection import KFold, StratifiedKFold -from sklearn.metrics import confusion_matrix, accuracy_score, classification_report +from sklearn.model_selection import KFold from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier @@ -19,7 +17,24 @@ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC -from sklearn.externals import joblib + +from common_defs import fd_hu_moments, fd_haralick, fd_histogram, fixed_size, train_path + +# no.of.trees for Random Forests +num_trees = 100 + +# train_test_split size +test_size = 0.10 + +# seed for reproducing same results +seed = 9 + +# get the training labels +train_labels = os.listdir(train_path) + +# sort the training labels +train_labels.sort() +print(train_labels) # create all the machine learning models models = [] @@ -50,10 +65,10 @@ h5f_label.close() # verify the shape of the feature vector and labels -print "[STATUS] features shape: {}".format(global_features.shape) -print "[STATUS] labels shape: {}".format(global_labels.shape) +print("[STATUS] features shape: {}".format(global_features.shape)) +print("[STATUS] labels shape: {}".format(global_labels.shape)) -print "[STATUS] training started..." +print("[STATUS] training started...") # split the training and testing data (trainDataGlobal, testDataGlobal, trainLabelsGlobal, testLabelsGlobal) = train_test_split(np.array(global_features), @@ -61,11 +76,11 @@ test_size=test_size, random_state=seed) -print "[STATUS] splitted train and test data..." -print "Train data : {}".format(trainDataGlobal.shape) -print "Test data : {}".format(testDataGlobal.shape) -print "Train labels: {}".format(trainLabelsGlobal.shape) -print "Test labels : {}".format(testLabelsGlobal.shape) +print("[STATUS] splitted train and test data...") +print("Train data : {}".format(trainDataGlobal.shape)) +print("Test data : {}".format(testDataGlobal.shape)) +print("Train labels: {}".format(trainLabelsGlobal.shape)) +print("Test labels : {}".format(testLabelsGlobal.shape)) # filter all the warnings import warnings