import math
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
import os
import csv
import random
%matplotlib inline
features = []
labels = []
with open('./papsmear-features-normal.csv', newline='') as csvfile:
stored_features = csv.reader(csvfile, delimiter=',', quotechar='|')
for row in stored_features:
filename = row[0]
class_label = row[1]
v = np.array(row[2:len(row)], dtype=np.float32)
f = [filename, class_label, v]
features.append(f)
labels.append(class_label)
with open('./papsmear-features-displastic.csv', newline='') as csvfile:
stored_features = csv.reader(csvfile, delimiter=',', quotechar='|')
for row in stored_features:
filename = row[0]
class_label = row[1]
v = np.array(row[2:len(row)], dtype=np.float32)
f = [filename, class_label, v]
features.append(f)
labels.append(class_label)
feature_length = len(features[0][2])
features, labels = shuffle(features, labels, random_state=0)
N_train = 200
features_train = features[0:N_train]
features_test = features[N_train:len(features)]
labels_train = labels[0:N_train]
labels_test = labels[N_train:len(features)]
data_train = np.zeros((len(features_train), feature_length))
data_test = np.zeros((len(features_test), feature_length))
for i in range(0, len(features_train)):
data_train[i, :] = features_train[i][2]
for i in range(0, len(features_test)):
data_test[i, :] = features_test[i][2]
clf = SVC(random_state=0, max_iter=500)
clf.fit(data_train, np.asarray(labels_train))
prediction = clf.predict(data_test)
print(classification_report(labels_test, prediction))