-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathpreprocessBraTSdataset.m
More file actions
151 lines (129 loc) · 5.05 KB
/
Copy pathpreprocessBraTSdataset.m
File metadata and controls
151 lines (129 loc) · 5.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
function preprocessBraTSdataset(destination,source)
% Crop the data set to a region containing primarily the brain and tumor.
% Then, normalize each modality of the 4-D volumetric series independently
% by subtracting the mean and dividing by the standard deviation of the
% cropped brain region. Finally, split the data set into training,
% validation and test sets.
%
% Copyright 2018 The MathWorks, Inc.
%% Load data
volLoc = [source filesep 'imagesTr'];
lblLoc = [source filesep 'labelsTr'];
if ~exist(volLoc,'dir') || ~exist(lblLoc,'dir')
error(['Please unzip Task01_BrainTumour.tar file to ' source])
end
moveHiddenFiles(source,volLoc,lblLoc);
% If the directory for preprocessed data does not exist, or only a partial
% set of the data files have been processed, process the data.
if ~exist(destination,'dir') || proceedWithPreprocessing(destination)
mkdir(fullfile(destination,'imagesTr'));
mkdir(fullfile(destination,'labelsTr'));
mkdir(fullfile(destination,'imagesVal'));
mkdir(fullfile(destination,'labelsVal'));
mkdir(fullfile(destination,'imagesTest'));
mkdir(fullfile(destination,'labelsTest'));
labelReader = @(x) (niftiread(x) > 0);
volReader = @(x) niftiread(x);
volds = imageDatastore(volLoc, ...
'FileExtensions','.gz','ReadFcn',volReader);
classNames = ["background","tumor"];
pixelLabelID = [0 1];
pxds = pixelLabelDatastore(lblLoc,classNames, pixelLabelID, ...
'FileExtensions','.gz','ReadFcn',labelReader);
reset(volds);
reset(pxds);
%% Crop relevant region
NumFiles = length(pxds.Files);
id = 1;
while hasdata(pxds)
outL = readNumeric(pxds);
outV = read(volds);
temp = outL>0;
sz = size(outL);
reg = regionprops3(temp,'BoundingBox');
tol = 64;
ROI = ceil(reg.BoundingBox(1,:));
ROIst = ROI(1:3) - tol;
ROIend = ROI(1:3) + ROI(4:6) + tol;
ROIst(ROIst<1)=1;
ROIend(ROIend>sz)=sz(ROIend>sz);
tumorRows = ROIst(2):ROIend(2);
tumorCols = ROIst(1):ROIend(1);
tumorPlanes = ROIst(3):ROIend(3);
tcropVol = outV(tumorRows,tumorCols, tumorPlanes,:);
tcropLabel = outL(tumorRows,tumorCols, tumorPlanes);
% Data set with a valid size for 3-D U-Net (multiple of 8)
ind = floor(size(tcropVol)/8)*8;
incropVol = tcropVol(1:ind(1),1:ind(2),1:ind(3),:);
mask = incropVol == 0;
cropVol = channelWisePreProcess(incropVol);
% Set the nonbrain region to 0
cropVol(mask) = 0;
cropLabel = tcropLabel(1:ind(1),1:ind(2),1:ind(3));
% Split data into training, validation and test sets. Roughly 82%
% are training, 6% are validation, and 12% are test
if (id < floor(0.83*NumFiles))
imDir = fullfile(destination,'imagesTr','BraTS');
labelDir = fullfile(destination,'labelsTr','BraTS');
elseif (id < floor(0.89*NumFiles))
imDir = fullfile(destination,'imagesVal','BraTS');
labelDir = fullfile(destination,'labelsVal','BraTS');
else
imDir = fullfile(destination,'imagesTest','BraTS');
labelDir = fullfile(destination,'labelsTest','BraTS');
end
save([imDir num2str(id,'%.3d') '.mat'],'cropVol');
save([labelDir num2str(id,'%.3d') '.mat'],'cropLabel');
id=id+1;
end
end
end
function out = channelWisePreProcess(in)
% As input has 4 channels (modalities), remove the mean and divide by the
% standard deviation of each modality independently.
chn_Mean = mean(in,[1 2 3]);
chn_Std = std(in,0,[1 2 3]);
out = (in - chn_Mean)./chn_Std;
rangeMin = -5;
rangeMax = 5;
% Remove outliers
out(out > rangeMax) = rangeMax;
out(out < rangeMin) = rangeMin;
% Rescale the data to the range [0, 1]
out = (out - rangeMin) / (rangeMax - rangeMin);
end
function moveHiddenFiles(source,volLoc,lblLoc)
% The original data set includes hidden files whose filenames begin with
% "._". Move these files out of the training, test, and validation data
% directories.
myLoc = pwd;
hiddenDir = fullfile(source,'HiddenFiles');
if ~exist(hiddenDir,'dir')
mkdir(hiddenDir);
cd(volLoc);
!mv ._* ../HiddenFiles/
cd(lblLoc)
!mv ._* ../HiddenFiles/
end
cd(myLoc)
end
function out = proceedWithPreprocessing(destination)
totalNumFiles = 484;
numFiles = 0;
if exist(fullfile(destination,'imagesTr'),'dir')
tmp1 = dir(fullfile(destination,'imagesTr'));
numFiles = numFiles + sum(~vertcat(tmp1.isdir));
end
if exist(fullfile(destination,'imagesVal'),'dir')
tmp1 = dir(fullfile(destination,'imagesVal'));
numFiles = numFiles + sum(~vertcat(tmp1.isdir));
end
if exist(fullfile(destination,'imagesTest'),'dir')
tmp1 = dir(fullfile(destination,'imagesTest'));
numFiles = numFiles + sum(~vertcat(tmp1.isdir));
end
% If total number of preprocessed files is not equal to the number of
% files in the dataset, perform preprocessing. Otherwise, preprocessing has
% already been completed and can be skipped.
out = (numFiles ~= totalNumFiles);
end