Kaggle RSNA Intracranial Hemorrhage
This project is my submission to Kaggle competition RSNA Intracranial Hemorrhage 2019, which put me in top 11% out of 1345 teams.
There were a lot of great notebooks exploring the data, and providing ideas for the baseline models by the time I joined the competition. I used this notebook InceptionV3 (prev. ResNet50) Keras baseline model by akensert as a starting point. And most of ideas and methods for data cleaning and preparation were taken from Jeremy Howard series of notebooks on FastAI implementation for this competition.
Surprising discovery for me during this competition was the new approach to ensembling: averaging predictions of a single model across epochs, instead of combination of predictions of a few models for the final epoch. I understand the intuition behind it, however didn't expect this would work so well. Since I didn't have enough compute power to build and train complex ensemble, I adopted this approach, and combined it with ensembling of 3 models over limited number of epochs.
Based on cross-validation it gave me a significant boost (vs single model averaging), so I used it for the 2nd stage of the competition. I barely had enough time to finish 4 folds before the deadline, and I had to use smaller images - just 256px.
Undoubtedly bigger images will boost precision. Also I believe that traditional K-fold approach with ensembling of a few models shall be more accurate (and I'm planning on checking this belief when I get access to better hardware).
Project code
import numpy as np
import pandas as pd
import sys
import os
from math import ceil, floor, log
import cv2
import keras
from keras import backend as K
from imgaug import augmenters as iaa
import pydicom
from pydicom.dataset import Dataset as DcmDataset
import pickle
from efficientnet.keras import EfficientNetB0, EfficientNetB2
from keras_applications.inception_v3 import InceptionV3
from sklearn.model_selection import ShuffleSplit, train_test_split
test_images_dir = 'stage_2_test_images/'
train_images_dir = 'stage_2_train_images/'
train_1 = "stage_1_train.csv"
train_2 = "stage_2_train.csv"
test1_labels = 'stage_1_sample_submission.csv'
test2_labels = 'stage_2_sample_submission.csv'
# define if algorithm is called for stage1 datasets or stage2
stage2 = True
#preprocessing functions, correcting metadata and rescaling pixel values, mapping to RGB
#---------------------------------------------------------------------------------------------
def correct_dcm(dcm):
x = dcm.pixel_array + 1000
px_mode = 4096
x[x>=px_mode] = x[x>=px_mode] - px_mode
dcm.PixelData = x.tobytes()
dcm.RescaleIntercept = -1000
def window_image(dcm, window_center, window_width):
if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):
correct_dcm(dcm)
img = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept
img_min = window_center - window_width // 2
img_max = window_center + window_width // 2
img = np.clip(img, img_min, img_max)
return img
def bsb_window(dcm):
brain_img = window_image(dcm, 40, 80)
subdural_img = window_image(dcm, 80, 200)
soft_img = window_image(dcm, 40, 380)
brain_img = (brain_img - 0) / 80
subdural_img = (subdural_img - (-20)) / 200
soft_img = (soft_img - (-150)) / 380
bsb_img = np.array([brain_img, subdural_img, soft_img]).transpose(1,2,0)
return bsb_img
# function to check if there is data in the image reflecting brain tissue
#---------------------------------------------------------------------------------------------
def brain_in_window(dcm:DcmDataset, window_center = 40 , window_width = 80):
"% of pixels in the window reflecting brain matter "
if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):
correct_dcm(dcm)
px = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept
return ((px > window_center-window_width//2) & (px < window_center+window_width//2)).mean().item()
#---------------------------------------------------------------------------------------------
# Image Augmentation
sometimes = lambda aug: iaa.Sometimes(0.35, aug)
augmentation = iaa.Sequential([
iaa.Fliplr(0.5),
sometimes(iaa.Crop(px=(20, 80), keep_size = True, sample_independently = False)),
sometimes(iaa.Affine(rotate=(-35, 35)))
], random_order = True)
#---------------------------------------------------------------------------------------------
def _read(path, desired_size, toAugment = True):
"""Will be used in DataGenerator"""
dcm = pydicom.dcmread(path)
try:
img = bsb_window(dcm)
except:
img = np.zeros(desired_size)
if toAugment:
img = augmentation.augment_image(img)
if desired_size[0] != img.shape[0]:
img = cv2.resize(img, desired_size[:2], interpolation=cv2.INTER_LINEAR)
return img
#---------------------------------------------------------------------------------------------
class DataGenerator(keras.utils.Sequence):
def __init__(self, list_IDs, labels=None, batch_size=1, img_size=(512, 512, 1),
img_dir=train_images_dir, testAugment = False,
*args, **kwargs):
self.list_IDs = list_IDs
self.labels = labels
self.batch_size = batch_size
self.img_size = img_size
self.img_dir = img_dir
self.testAugment = testAugment
self.on_epoch_end()
def __len__(self):
return int(ceil(len(self.indices) / self.batch_size))
def __getitem__(self, index):
indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
list_IDs_temp = [self.list_IDs[k] for k in indices]
if self.labels is not None:
X, Y = self.__data_generation(list_IDs_temp)
return X, Y
else:
X = self.__data_generation(list_IDs_temp)
return X
def on_epoch_end(self):
if self.labels is not None: # for training phase we undersample and shuffle
# keep probability of any=0 and any=1
keep_prob = self.labels.iloc[:, 0].map({0: 0.35, 1: 0.5})
keep = (keep_prob > np.random.rand(len(keep_prob)))
self.indices = np.arange(len(self.list_IDs))[keep]
np.random.shuffle(self.indices)
else:
self.indices = np.arange(len(self.list_IDs))
def __data_generation(self, list_IDs_temp):
X = np.empty((self.batch_size, *self.img_size))
if self.labels is not None: # training phase
Y = np.empty((self.batch_size, 6), dtype=np.float32)
for i, ID in enumerate(list_IDs_temp):
X[i,] = _read(self.img_dir+ID+".dcm", self.img_size, toAugment = True)
Y[i,] = self.labels.loc[ID].values
return X, Y
elif self.testAugment: # test phase with Augmentation
for i, ID in enumerate(list_IDs_temp):
X[i,] = _read(self.img_dir+ID+".dcm", self.img_size, toAugment = True)
return X
else: # test phase no Augmentation
for i, ID in enumerate(list_IDs_temp):
X[i,] = _read(self.img_dir+ID+".dcm", self.img_size, toAugment = False)
return X
#---------------------------------------------------------------------------------------------
def weighted_log_loss(y_true, y_pred):
"""
Can be used as the loss function in model.compile()
---------------------------------------------------
"""
class_weights = np.array([2., 1., 1., 1., 1., 1.])
eps = K.epsilon()
y_pred = K.clip(y_pred, eps, 1.0-eps)
out = -( y_true * K.log( y_pred) * class_weights
+ (1.0 - y_true) * K.log(1.0 - y_pred) * class_weights)
return K.mean(out, axis=-1)
def _normalized_weighted_average(arr, weights=None):
"""
A simple Keras implementation that mimics that of
numpy.average(), specifically for this competition
"""
if weights is not None:
scl = K.sum(weights)
weights = K.expand_dims(weights, axis=1)
return K.sum(K.dot(arr, weights), axis=1) / scl
return K.mean(arr, axis=1)
def weighted_loss(y_true, y_pred):
"""
Will be used as the metric in model.compile()
---------------------------------------------
Similar to the custom loss function 'weighted_log_loss()' above
but with normalized weights, which should be very similar
to the official competition metric:
https://www.kaggle.com/kambarakun/lb-probe-weights-n-of-positives-scoring
and hence:
sklearn.metrics.log_loss with sample weights
"""
class_weights = K.variable([2., 1., 1., 1., 1., 1.])
eps = K.epsilon()
y_pred = K.clip(y_pred, eps, 1.0-eps)
loss = -( y_true * K.log( y_pred)
+ (1.0 - y_true) * K.log(1.0 - y_pred))
loss_samples = _normalized_weighted_average(loss, class_weights)
return K.mean(loss_samples)
def weighted_log_loss_metric(trues, preds):
"""
Will be used to calculate the log loss
of the validation set in PredictionCheckpoint()
------------------------------------------
"""
class_weights = [2., 1., 1., 1., 1., 1.]
epsilon = 1e-7
preds = np.clip(preds, epsilon, 1-epsilon)
loss = trues * np.log(preds) + (1 - trues) * np.log(1 - preds)
loss_samples = np.average(loss, axis=1, weights=class_weights)
return - loss_samples.mean()
#---------------------------------------------------------------------------------------------
class PredictionCheckpoint(keras.callbacks.Callback):
def __init__(self, test_df, valid_df,
test_images_dir=test_images_dir,
valid_images_dir=train_images_dir,
batch_size=32, input_size=(224, 224, 3)):
self.test_df = test_df
self.valid_df = valid_df
self.test_images_dir = test_images_dir
self.valid_images_dir = valid_images_dir
self.batch_size = batch_size
self.input_size = input_size
def on_train_begin(self, logs={}):
self.test_predictions = []
self.valid_predictions = []
def on_epoch_end(self,epoch, logs={}):
print('End of Epoch #{}'.format(epoch+1))
if epoch >=3:
# 'direct' prediction
self.test_predictions.append(
self.model.predict_generator(
DataGenerator(self.test_df.index, None, self.batch_size, self.input_size, self.test_images_dir, testAugment = False),
use_multiprocessing=False,
workers=4,
verbose=1)[:len(self.test_df)])
# adding 3 augmented predictions
for i in range(3):
self.test_predictions.append(
self.model.predict_generator(
DataGenerator(self.test_df.index, None, self.batch_size, self.input_size, self.test_images_dir, testAugment = True),
use_multiprocessing=False,
workers=4,
verbose=1)[:len(self.test_df)])
else:
print('Skipped predictions...')
# by the way skipped the validation here to save time. doing it separately afterwards afyer the fold ended.
# but certainely should do it here for more control and predictability. Just need more compute power :(
#---------------------------------------------------------------------------------------------
class MyDeepModel:
def __init__(self, engine, input_dims, batch_size=5, num_epochs=4, learning_rate=1e-3,
decay_rate=1.0, decay_steps=1, weights="imagenet", verbose=1):
self.engine = engine
self.input_dims = input_dims
self.batch_size = batch_size
self.num_epochs = num_epochs
self.learning_rate = learning_rate
self.decay_rate = decay_rate
self.decay_steps = decay_steps
self.weights = weights
self.verbose = verbose
self._build()
def _build(self):
engine = self.engine(include_top=False, weights=self.weights, input_shape=self.input_dims,
backend = keras.backend, layers = keras.layers,
models = keras.models, utils = keras.utils)
x = keras.layers.GlobalAveragePooling2D(name='avg_pool')(engine.output)
x = keras.layers.Dropout(0.2)(x)
out = keras.layers.Dense(6, activation="sigmoid", name='dense_output')(x)
self.model = keras.models.Model(inputs=engine.input, outputs=out)
self.model.compile(loss="binary_crossentropy", optimizer=keras.optimizers.Adam(), metrics=[weighted_loss])
def fit_and_predict(self, train_df, valid_df, test_df):
# callbacks
pred_history = PredictionCheckpoint(test_df, valid_df, input_size=self.input_dims)
scheduler = keras.callbacks.LearningRateScheduler(lambda epoch: self.learning_rate * pow(self.decay_rate, floor(epoch / self.decay_steps)))
self.model.fit_generator(
DataGenerator(
train_df.index,
train_df,
self.batch_size,
self.input_dims,
train_images_dir
),
epochs=self.num_epochs,
verbose=self.verbose,
use_multiprocessing=False,
workers=4,
callbacks=[pred_history, scheduler]
)
return pred_history
def save(self, path):
self.model.save_weights(path)
def load(self, path):
self.model.load_weights(path)
#---------------------------------------------------------------------------------------------
def read_testset(stage2):
if stage2 :
df = pd.read_csv(test2_labels)
else:
df = pd.read_csv(test1_labels)
df["Image"] = df["ID"].str.slice(stop=12)
df["Diagnosis"] = df["ID"].str.slice(start=13)
df = df.loc[:, ["Label", "Diagnosis", "Image"]]
df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)
return df
#---------------------------------------------------------------------------------------------
def read_trainset(Stage2):
if Stage2:
df = pd.read_csv(train_2)
else:
df = pd.read_csv(train_1)
duplicates_to_remove = df[df.duplicated('ID', keep = 'first')].index.tolist()
df["Image"] = df["ID"].str.slice(stop=12)
df["Diagnosis"] = df["ID"].str.slice(start=13)
df = df.drop(index=duplicates_to_remove)
df = df.reset_index(drop=True)
df = df.loc[:, ["Label", "Diagnosis", "Image"]]
df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)
return df
#---------------------------------------------------------------------------------------------
#drop some images with the brain tissue less than threshold
def discard_no_brain():
all_labels = df.index.tolist()
pcts = []
bad_files = []
for n_file in all_labels:
dicom = pydicom.dcmread(train_images_dir + n_file + '.dcm')
try:
pct = brain_in_window(dicom, 40, 80)
except:
pct = -1
print(n_file,'\n')
bad_files.append(n_file)
pcts.append(pct)
useful = pd.DataFrame()
useful['label'] = all_labels
useful['pct'] = pcts
uf = useful.loc[useful.pct > 0.02]
uf.to_csv('with_brain.csv')
test_df = read_testset(stage2)
df = read_trainset(stage2)
discard_no_brain()
uf = pd.read_csv('with_brain.csv')
df_with_brain = df.loc[uf.label]
BS = 24
# -------------------------------------------------------------------------------------------------------------------------
model_EfficientNetB2 = MyDeepModel(engine=EfficientNetB2, input_dims=(256, 256, 3), batch_size=BS, learning_rate=5e-4,
num_epochs=5, decay_rate=0.8, decay_steps=1, weights="imagenet", verbose=1)
model_InceptionV3 = MyDeepModel(engine=InceptionV3, input_dims=(256, 256, 3), batch_size=32, learning_rate=5e-4,
num_epochs=5, decay_rate=0.8, decay_steps=1, weights="imagenet", verbose=1)
model_EfficientNetB0 = MyDeepModel(engine=EfficientNetB0, input_dims=(256, 256, 3), batch_size=32, learning_rate=5e-4,
num_epochs=5, decay_rate=0.8, decay_steps=1, weights="imagenet", verbose=1)
# -------------------------------------------------------------------------------------------------------------------------
# service function to call fit on already intitialized model, with different params if needed
def fit_and_predict_wrap(model, train_df, valid_df, test_df, batch_size = 32, num_epochs = 5, verbose = 1):
# callbacks
pred_history = PredictionCheckpoint(test_df, valid_df, input_size = model.input_dims)
scheduler = keras.callbacks.LearningRateScheduler(lambda epoch: model.learning_rate * pow(model.decay_rate, floor(epoch / model.decay_steps)))
model.model.fit_generator(
DataGenerator(
train_df.index,
train_df,
batch_size,
model.input_dims,
train_images_dir
),
epochs = num_epochs,
verbose = verbose,
use_multiprocessing=False,
workers=4,
callbacks=[pred_history, scheduler]
)
return pred_history
# -------------------------------------------------------------------------------------------------------------------------
def fit_predict_save(model_T, model_name,fold_num):
history = model_T.fit_and_predict(df_new.iloc[train_idx], df_new.iloc[valid_idx], test_df)
history_new.append(history.test_predictions)
model_T.save((f'St2-{model_name}-5epochs-{fold_num}_fold-4tta-256.h5'))
with open((f'history_{fold_num}folds.pickle'), 'wb') as f:
pickle.dump(history_new, f, pickle.HIGHEST_PROTOCOL)
# -------------------------------------------------------------------------------------------------------------------------
# validate the result the same way test set prediction
def valid_predictions(model_T):
preds = []
preds.append(model_T.model.predict_generator(
DataGenerator(X_val.index, None, 16, (256, 256, 3), train_images_dir, testAugment = False),
use_multiprocessing=False,
workers=8,
verbose=1)[:len(X_val)])
for i in range(3):
preds.append(model_T.model.predict_generator(
DataGenerator(X_val.index, None, 16, (256, 256, 3), train_images_dir, testAugment = True),
use_multiprocessing=False,
workers=8,
verbose=1)[:len(X_val)])
val = np.average(preds, axis=0)
return val
# -------------------------------------------------------------------------------------------------------------------------
X_train, X_val = train_test_split(df_with_brain, test_size=0.1, random_state=1970)
df_new = X_train.copy()
ss_new = ShuffleSplit(n_splits=4, test_size=0.2, random_state=1970).split(df_new.index)
history_new = []
valid_history = []
num_folds = 5
for cnt in range(1,num_folds+1):
print('* * * * * * * * * * * * * * * * ')
print(f'* * * Fold * * * #{cnt}')
train_idx, valid_idx = next(ss_new)
fit_predict_save(model_EfficientNetB2, 'B2', cnt)
fit_predict_save(model_InceptionV3, 'V3', cnt)
fit_predict_save(model_EfficientNetB0, 'B0', cnt)
V_B0 = valid_predictions(model_EfficientNetB0)
val_B0 = weighted_log_loss_metric(X_val.values, V_B0)
V_B2 = valid_predictions(model_EfficientNetB2)
val_B2 = weighted_log_loss_metric(X_val.values, V_B2)
V_V3 = valid_predictions(model_InceptionV3)
val_V3 = weighted_log_loss_metric(X_val.values, V_V3)
all3_val = [V_B0 , V_B2 , V_V3]
val_avg3 = np.average(all3_val, axis=0)
val_ALL3 = weighted_log_loss_metric(X_val.values, val_avg3)
valid_history.append({'B0':val_B0})
valid_history.append({'B2':val_B2})
valid_history.append({'V3':val_V3})
valid_history.append({'avg_3':val_ALL3})
with open('history_validation.pickle', 'wb') as f:
pickle.dump(valid_history, f, pickle.HIGHEST_PROTOCOL)
shapes = history_new[0][0].shape
folds_preds = np.zeros((len(history_new),shapes[0],shapes[1]))
for i in range(len(history_new)):
folds_preds[i] = np.average(history_new[i], axis=0)
averaged_preds = np.average(folds_preds, axis=0)
test_df_pred = test_df.copy()
test_df_pred.iloc[:, :] = averaged_preds
test_df_pred = test_df_pred.stack().reset_index()
test_df_pred.insert(loc=0, column='ID', value=test_df_pred['Image'].astype(str) + "_" + test_df_pred['Diagnosis'])
test_df_pred = test_df_pred.drop(["Image", "Diagnosis"], axis=1)
test_df_pred.to_csv('stage2_b2v3b0_5e-4folds-2_last_epochs-4TTA-256.csv', index=False)