import math
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
import os
import csv
import random
%matplotlib inline
features = []
labels = []
with open('./papsmear-features-normal.csv', newline='') as csvfile:
stored_features = csv.reader(csvfile, delimiter=',', quotechar='|')
for row in stored_features:
filename = row[0]
class_label = row[1]
v = np.array(row[2:len(row)], dtype=np.float32)
f = [filename, class_label, v]
features.append(f)
labels.append(class_label)
with open('./papsmear-features-displastic.csv', newline='') as csvfile:
stored_features = csv.reader(csvfile, delimiter=',', quotechar='|')
for row in stored_features:
filename = row[0]
class_label = row[1]
v = np.array(row[2:len(row)], dtype=np.float32)
f = [filename, class_label, v]
features.append(f)
labels.append(class_label)
feature_length = len(features[0][2])
features, labels = shuffle(features, labels, random_state=0)
N_train = 200
features_train = features[0:N_train]
features_test = features[N_train:len(features)]
labels_train = labels[0:N_train]
labels_test = labels[N_train:len(features)]
data_train = np.zeros((len(features_train), feature_length))
data_test = np.zeros((len(features_test), feature_length))
for i in range(0, len(features_train)):
data_train[i, :] = features_train[i][2]
for i in range(0, len(features_test)):
data_test[i, :] = features_test[i][2]
clf = GaussianMixture(n_components=2, covariance_type='full',
tol=0.001, reg_covar=1e-06,
max_iter=100, n_init=1,
init_params='kmeans',
weights_init=None, means_init=None,
precisions_init=None, random_state=None,
warm_start=False, verbose=0,
verbose_interval=10)
clf.fit(data_train, np.asarray(labels_train))
prediction = clf.predict(data_test)
print(prediction)
print(np.array(labels_test, dtype=np.int))
print(classification_report(np.array(labels_test, dtype=np.int), prediction))
print(clf.means_)
print(clf.covariances_)
print(clf.weights_)