Visit the wiki pages to find some additional documentation and instructions on how view an interactive verson of these notebooks using binder.

In [1]:
import math
import numpy as np
import matplotlib.pyplot as plt


from sklearn.utils import shuffle
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report

import os
import csv
import random

%matplotlib inline

Step 1: load the csv file that contain the generated features

In [2]:
features = []
labels = []

with open('./papsmear-features-normal.csv', newline='') as csvfile:
    stored_features = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in stored_features:
        filename = row[0]
        class_label = row[1]
        v = np.array(row[2:len(row)], dtype=np.float32)
        f = [filename, class_label, v]
        features.append(f)
        labels.append(class_label)
In [3]:
with open('./papsmear-features-displastic.csv', newline='') as csvfile:
    stored_features = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in stored_features:
        filename = row[0]
        class_label = row[1]
        v = np.array(row[2:len(row)], dtype=np.float32)
        f = [filename, class_label, v]
        features.append(f)
        labels.append(class_label)

Step 2: Split the data into training and testing

In [4]:
feature_length = len(features[0][2])
features, labels = shuffle(features, labels, random_state=0)
In [5]:
N_train = 200
features_train = features[0:N_train]
features_test = features[N_train:len(features)]

labels_train = labels[0:N_train]
labels_test = labels[N_train:len(features)]
In [6]:
data_train = np.zeros((len(features_train), feature_length))
data_test  = np.zeros((len(features_test), feature_length))
In [7]:
for i in range(0, len(features_train)):
    data_train[i, :] = features_train[i][2]
    
for i in range(0, len(features_test)):
    data_test[i, :] = features_test[i][2]

Step 3: Set up the classifier and train it

In [8]:
clf = SVC(random_state=0, max_iter=500)
In [9]:
clf.fit(data_train, np.asarray(labels_train))
Out[9]:
SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=500, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)
In [10]:
prediction = clf.predict(data_test)
In [11]:
print(classification_report(labels_test, prediction))
              precision    recall  f1-score   support

           0       0.83      0.29      0.43        17
           1       0.82      0.98      0.89        54

    accuracy                           0.82        71
   macro avg       0.82      0.64      0.66        71
weighted avg       0.82      0.82      0.78        71

In [ ]: