Visit the wiki pages to find some additional documentation and instructions on how view an interactive verson of these notebooks using binder.

Process the PAP-SMEAR image data¶

We now apply the feature extraction to the entire dataset and save the results in a CSV file. This file is then used by other scripts in this directory.

import math
import numpy as np
import matplotlib.pyplot as plt

import skimage
from skimage import io, measure, exposure
from skimage.color import rgba2rgb, rgb2gray
from skimage.feature import greycomatrix, greycoprops


import os
import csv

%matplotlib inline

print(skimage.__version__)

0.16.2

Function to extract features from an image¶

Papsmear data taken from PAP-SMEAR Databses Part II

def extract_features(image_input):
    gray =  rgb2gray(image_input)
    [counts, bins] = exposure.histogram(gray,nbins=16,source_range='dtype',normalize=True)
    v1 = counts.flatten()
    
    gray_uint = (255*gray).astype(np.uint8)
    glcm = greycomatrix(gray_uint, distances=[5], angles=[0], levels=256,
                        symmetric=True, normed=True)
    
    
    v2 = np.zeros(6)
    v2[0] = greycoprops(glcm, 'contrast')[0, 0]
    v2[1] = greycoprops(glcm, 'dissimilarity')[0, 0]
    v2[2] = greycoprops(glcm, 'homogeneity')[0, 0]
    v2[3] = greycoprops(glcm, 'ASM')[0, 0]
    v2[4] = greycoprops(glcm, 'energy')[0, 0]
    v2[5] = greycoprops(glcm, 'correlation')[0, 0]
    
    v = np.concatenate([v1,v2])
    
    return v

Main function for processing class: normal superficiel¶

directory = "../images/papsmear-data/normal_superficiel/"

features = []

for filename in os.listdir(directory):
    if filename.endswith(".BMP"):
        image = io.imread(directory + filename)
        v = extract_features(image)
        v_list = [filename, 0] + v.tolist()
        features.append(v_list)

write the data to a csv file¶

with open('papsmear-features-normal.csv', 'w', newline='') as csvfile:
     fieldnames = ['filename', 'class', 'features']
     wr = csv.writer(csvfile)
     wr.writerows(features)

Main function for processing class: severe displastic¶

directory = "../images/papsmear-data/severe_dysplastic/"

features = []

for filename in os.listdir(directory):
    if filename.endswith(".BMP"):
        image = io.imread(directory + filename)
        v = extract_features(image)
        v_list = [filename, 1] + v.tolist()
        features.append(v_list)

write the data to a csv file¶

with open('../papsmear-features-displastic.csv', 'w', newline='') as csvfile:
     fieldnames = ['filename', 'class', 'features']
     wr = csv.writer(csvfile)
     wr.writerows(features)