From 5f97f6ff02d945d15c78857c317fa2f5df0421f7 Mon Sep 17 00:00:00 2001 From: Anna Petrovicheva Date: Sat, 4 Apr 2020 23:33:38 +0300 Subject: [PATCH 1/5] T-SNE on Animals10 dataset --- TSNE/animals_dataset.py | 91 +++++++++++++++++ TSNE/resnet.py | 37 +++++++ TSNE/tsne.py | 211 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 339 insertions(+) create mode 100644 TSNE/animals_dataset.py create mode 100644 TSNE/resnet.py create mode 100644 TSNE/tsne.py diff --git a/TSNE/animals_dataset.py b/TSNE/animals_dataset.py new file mode 100644 index 000000000..358703a57 --- /dev/null +++ b/TSNE/animals_dataset.py @@ -0,0 +1,91 @@ +from os import path, listdir +import torch +from torchvision import transforms +import random + +from PIL import Image, ImageFile +ImageFile.LOAD_TRUNCATED_IMAGES = True + + +colors_per_class = { + 'dog' : (254, 202, 87), + 'horse' : (255, 107, 107), + 'elephant' : (10, 189, 227), + 'butterfly' : (255, 159, 243), + 'chicken' : (16, 172, 132), + 'cat' : (52, 31, 151), + 'cow' : (0, 210, 211), + 'sheep' : (84, 160, 255), + 'spider' : (87, 101, 116), + 'squirrel' : (200, 214, 229), +} + + +# processes Animals10 dataset: https://www.kaggle.com/alessiocorrado99/animals10 +class AnimalsDataset(torch.utils.data.Dataset): + def __init__(self, data_path, num_images=1000): + translation = {'cane' : 'dog', + 'cavallo' : 'horse', + 'elefante' : 'elephant', + 'farfalla' : 'butterfly', + 'gallina' : 'chicken', + 'gatto' : 'cat', + 'mucca' : 'cow', + 'pecora' : 'sheep', + 'ragno' : 'spider', + 'scoiattolo' : 'squirrel'} + + self.classes = translation.values() + + if not path.exists(data_path): + raise Exception(data_path + ' does not exist!') + + self.data = [] + + folders = listdir(data_path) + for folder in folders: + label = translation[folder] + + full_path = path.join(data_path, folder) + images = listdir(full_path) + + current_data = [(path.join(full_path, image), label) for image in images] + self.data += current_data + + num_images = min(num_images, len(self.data)) + self.data = random.sample(self.data, num_images) # only use num_images images + + self.transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + + + def __len__(self): + return len(self.data) + + + def __getitem__(self, index): + image_path, label = self.data[index] + + image = Image.open(image_path) + + try: + image = self.transform(image) # some images in the dataset cannot be processed - we'll skip them + except Exception: + return None + + dict_data = { + 'image' : image, + 'label' : label, + 'image_path' : image_path + } + return dict_data + + +# Skips empty samples in a batch +def collate_skip_empty(batch): + batch = [sample for sample in batch if sample] # check that sample is not None + return torch.utils.data.dataloader.default_collate(batch) diff --git a/TSNE/resnet.py b/TSNE/resnet.py new file mode 100644 index 000000000..ab54cffaf --- /dev/null +++ b/TSNE/resnet.py @@ -0,0 +1,37 @@ +import torch +from torchvision import models +from torch.hub import load_state_dict_from_url + + +# Define the architecture by modifying resnet. +# Original code is here +# https://github.com/pytorch/vision/blob/b2e95657cd5f389e3973212ba7ddbdcc751a7878/torchvision/models/resnet.py +class Resnet(models.ResNet): + def __init__(self, num_classes=1000, pretrained=True, **kwargs): + # Start with standard resnet50 defined here + # https://github.com/pytorch/vision/blob/b2e95657cd5f389e3973212ba7ddbdcc751a7878/torchvision/models/resnet.py + super().__init__(block=models.resnet.Bottleneck, layers=[3, 4, 23, 3], num_classes=num_classes, **kwargs) + if pretrained: + state_dict = load_state_dict_from_url(models.resnet.model_urls['resnet101'], progress=True) + self.load_state_dict(state_dict) + + # Reimplementing forward pass. + # Replacing the following code + # https://github.com/pytorch/vision/blob/b2e95657cd5f389e3973212ba7ddbdcc751a7878/torchvision/models/resnet.py#L197-L213 + def _forward_impl(self, x): + # Standard forward for resnet + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + # Notice there is no forward pass through the original classifier. + x = self.avgpool(x) + x = torch.flatten(x, 1) + + return x diff --git a/TSNE/tsne.py b/TSNE/tsne.py new file mode 100644 index 000000000..8980f5ac2 --- /dev/null +++ b/TSNE/tsne.py @@ -0,0 +1,211 @@ +import argparse +from tqdm import tqdm +import cv2 +import torch +import random +import numpy as np +from sklearn.manifold import TSNE + +from animals_dataset import AnimalsDataset, collate_skip_empty, colors_per_class +from resnet import Resnet + + +def fix_random_seeds(): + seed = 42 + random.seed(seed) + torch.manual_seed(seed) + np.random.seed(seed) + + +def get_model_outputs(dataset, batch, num_images): + # move the input and model to GPU for speed if available + if torch.cuda.is_available(): + device = 'cuda' + else: + device = 'cpu' + + # initialize our implementation of ResNet + model = Resnet(pretrained=True) + model.eval() + model.to(device) + + # read the dataset and initialize the data loader + dataset = AnimalsDataset(dataset, num_images) + dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch, collate_fn=collate_skip_empty, shuffle=True) + + # we'll store the features as NumPy array of size num_images x feature_size + outputs = None + + # we'll also store the image labels and paths to visualize them later + labels = [] + image_paths = [] + + for batch in tqdm(dataloader, desc='Running the model inference'): + images = batch['image'].to(device) + labels += batch['label'] + image_paths += batch['image_path'] + + with torch.no_grad(): + output = model.forward(images) + + current_outputs = output.cpu().numpy() + if outputs is not None: + outputs = np.concatenate((outputs, current_outputs)) + else: + outputs = current_outputs + + return outputs, labels, image_paths + + +def plot_legend(colors_per_class): + width = 300 + + offset = 30 + color_size = 30 + color_offset = 5 + + row_height = color_size + 2 * color_offset + num_colors = len(colors_per_class) + + height = row_height * num_colors + + legend = np.zeros((height, width, 3), np.uint8) + legend.fill(255) + + for i, label in enumerate(sorted(colors_per_class)): + color = colors_per_class[label] + + tl_x = offset + 1 + tl_y = row_height * i + color_offset + + br_x = tl_x + color_size + br_y = tl_y + color_size + + legend = cv2.rectangle( + legend, + (tl_x, tl_y), + (br_x, br_y), + color=color, + thickness=cv2.FILLED + ) + legend = cv2.putText( + legend, + label, + (br_x + offset, br_y - 2 * color_offset), + fontFace=cv2.FONT_HERSHEY_TRIPLEX, + fontScale=0.5, + color=(0, 0, 0) + ) + + cv2.imshow('legend', legend) + + +def scale_to_01_range(x): + value_range = (np.max(x) - np.min(x)) + starts_from_zero = x - np.min(x) + return starts_from_zero / value_range + + +def scale_image(image, max_image_size): + image_height, image_width, _ = image.shape + + scale = max(1, image_width / max_image_size, image_height / max_image_size) + image_width = int(image_width / scale) + image_height = int(image_height / scale) + + image = cv2.resize(image, (image_width, image_height)) + return image + + +def draw_rectangle_by_class(image, label): + image_height, image_width, _ = image.shape + + # get the color corresponding to image class + color = colors_per_class[label] + image = cv2.rectangle(image, (0, 0), (image_width - 1, image_height - 1), color=color, thickness=5) + + return image + + +def compute_plot_coordinates(image, x, y, image_centers_area_size, offset): + image_height, image_width, _ = image.shape + + # compute the image center coordinates on the plot + center_x = int(image_centers_area_size * x) + offset + center_y = int(image_centers_area_size * y) + offset + + # knowing the image center, compute the coordinates of the top left and bottom right corner + tl_x = center_x - int(image_width / 2) + tl_y = center_y - int(image_height / 2) + + br_x = tl_x + image_width + br_y = tl_y + image_height + + return tl_x, tl_y, br_x, br_y + + +def visualize_tsne_plot(tsne, images, labels, plot_size=1000, max_image_size=100): + # extract x and y coordinates representing the positions of the images on T-SNE plot + tx = tsne[:, 0] + ty = tsne[:, 1] + + # scale and move the coordinates so they fit [0; 1] range + tx = scale_to_01_range(tx) + ty = scale_to_01_range(ty) + + # we'll put the image centers in the central area of the plot + # and use offsets to make sure the images fit the plot + offset = max_image_size // 2 + image_centers_area_size = plot_size - 2 * offset + + tsne_plot = np.zeros((plot_size, plot_size, 3), np.uint8) + tsne_plot.fill(255) + + # now we'll put a small copy of every image to its corresponding T-SNE coordinate + for image_path, label, x, y in tqdm( + zip(images, labels, tx, ty), + desc='Building the T-SNE plot', + total=len(images) + ): + image = cv2.imread(image_path) + + # scale the image to put it to the plot + image = scale_image(image, max_image_size) + + # draw a rectangle with a color corresponding to the image class + image = draw_rectangle_by_class(image, label) + + # compute the coordinates of the image on the scaled plot visualization + tl_x, tl_y, br_x, br_y = compute_plot_coordinates(image, x, y, image_centers_area_size, offset) + + # put the image to its TSNE coordinates using numpy subarray indices + tsne_plot[tl_y:br_y, tl_x:br_x, :] = image + + plot_legend(colors_per_class) + + cv2.imshow('T-SNE', tsne_plot) + cv2.waitKey() + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument('--path', type=str, default='data/raw-img') + parser.add_argument('--batch', type=int, default=64) + parser.add_argument('--num_images', type=int, default=500) + args = parser.parse_args() + + fix_random_seeds() + + model_outputs, labels, image_paths = get_model_outputs( + dataset=args.path, + batch=args.batch, + num_images=args.num_images + ) + + tsne = TSNE(n_components=2).fit_transform(model_outputs) + + visualize_tsne_plot(tsne, image_paths, labels) + +if __name__ == '__main__': + main() From 06dc68ae8a8bfbccf3fa072203ffee6d5a1fdcd1 Mon Sep 17 00:00:00 2001 From: Anna Petrovicheva Date: Mon, 6 Apr 2020 02:00:11 +0300 Subject: [PATCH 2/5] Minor corrections --- TSNE/animals_dataset.py | 22 ++++--- TSNE/resnet.py | 4 +- TSNE/tsne.py | 131 +++++++++++++++++++--------------------- 3 files changed, 77 insertions(+), 80 deletions(-) diff --git a/TSNE/animals_dataset.py b/TSNE/animals_dataset.py index 358703a57..2a6605e95 100644 --- a/TSNE/animals_dataset.py +++ b/TSNE/animals_dataset.py @@ -8,16 +8,16 @@ colors_per_class = { - 'dog' : (254, 202, 87), - 'horse' : (255, 107, 107), - 'elephant' : (10, 189, 227), - 'butterfly' : (255, 159, 243), - 'chicken' : (16, 172, 132), - 'cat' : (52, 31, 151), - 'cow' : (0, 210, 211), - 'sheep' : (84, 160, 255), - 'spider' : (87, 101, 116), - 'squirrel' : (200, 214, 229), + 'dog' : [254, 202, 87], + 'horse' : [255, 107, 107], + 'elephant' : [10, 189, 227], + 'butterfly' : [255, 159, 243], + 'chicken' : [16, 172, 132], + 'cat' : [128, 80, 128], + 'cow' : [87, 101, 116], + 'sheep' : [52, 31, 151], + 'spider' : [0, 0, 0], + 'squirrel' : [100, 100, 255], } @@ -55,6 +55,8 @@ def __init__(self, data_path, num_images=1000): num_images = min(num_images, len(self.data)) self.data = random.sample(self.data, num_images) # only use num_images images + # We use the transforms described in official PyTorch ResNet inference example: + # https://pytorch.org/hub/pytorch_vision_resnet/. self.transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), diff --git a/TSNE/resnet.py b/TSNE/resnet.py index ab54cffaf..dbdcebe3a 100644 --- a/TSNE/resnet.py +++ b/TSNE/resnet.py @@ -6,9 +6,9 @@ # Define the architecture by modifying resnet. # Original code is here # https://github.com/pytorch/vision/blob/b2e95657cd5f389e3973212ba7ddbdcc751a7878/torchvision/models/resnet.py -class Resnet(models.ResNet): +class ResNet101(models.ResNet): def __init__(self, num_classes=1000, pretrained=True, **kwargs): - # Start with standard resnet50 defined here + # Start with standard resnet101 defined here # https://github.com/pytorch/vision/blob/b2e95657cd5f389e3973212ba7ddbdcc751a7878/torchvision/models/resnet.py super().__init__(block=models.resnet.Bottleneck, layers=[3, 4, 23, 3], num_classes=num_classes, **kwargs) if pretrained: diff --git a/TSNE/tsne.py b/TSNE/tsne.py index 8980f5ac2..5aec05f9c 100644 --- a/TSNE/tsne.py +++ b/TSNE/tsne.py @@ -5,19 +5,20 @@ import random import numpy as np from sklearn.manifold import TSNE +import matplotlib.pyplot as plt from animals_dataset import AnimalsDataset, collate_skip_empty, colors_per_class -from resnet import Resnet +from resnet import ResNet101 def fix_random_seeds(): - seed = 42 + seed = 10 random.seed(seed) torch.manual_seed(seed) np.random.seed(seed) -def get_model_outputs(dataset, batch, num_images): +def get_features(dataset, batch, num_images): # move the input and model to GPU for speed if available if torch.cuda.is_available(): device = 'cuda' @@ -25,7 +26,7 @@ def get_model_outputs(dataset, batch, num_images): device = 'cpu' # initialize our implementation of ResNet - model = Resnet(pretrained=True) + model = ResNet101(pretrained=True) model.eval() model.to(device) @@ -34,7 +35,7 @@ def get_model_outputs(dataset, batch, num_images): dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch, collate_fn=collate_skip_empty, shuffle=True) # we'll store the features as NumPy array of size num_images x feature_size - outputs = None + features = None # we'll also store the image labels and paths to visualize them later labels = [] @@ -48,56 +49,13 @@ def get_model_outputs(dataset, batch, num_images): with torch.no_grad(): output = model.forward(images) - current_outputs = output.cpu().numpy() - if outputs is not None: - outputs = np.concatenate((outputs, current_outputs)) + current_features = output.cpu().numpy() + if features is not None: + features = np.concatenate((features, current_features)) else: - outputs = current_outputs + features = current_features - return outputs, labels, image_paths - - -def plot_legend(colors_per_class): - width = 300 - - offset = 30 - color_size = 30 - color_offset = 5 - - row_height = color_size + 2 * color_offset - num_colors = len(colors_per_class) - - height = row_height * num_colors - - legend = np.zeros((height, width, 3), np.uint8) - legend.fill(255) - - for i, label in enumerate(sorted(colors_per_class)): - color = colors_per_class[label] - - tl_x = offset + 1 - tl_y = row_height * i + color_offset - - br_x = tl_x + color_size - br_y = tl_y + color_size - - legend = cv2.rectangle( - legend, - (tl_x, tl_y), - (br_x, br_y), - color=color, - thickness=cv2.FILLED - ) - legend = cv2.putText( - legend, - label, - (br_x + offset, br_y - 2 * color_offset), - fontFace=cv2.FONT_HERSHEY_TRIPLEX, - fontScale=0.5, - color=(0, 0, 0) - ) - - cv2.imshow('legend', legend) + return features, labels, image_paths def scale_to_01_range(x): @@ -132,7 +90,10 @@ def compute_plot_coordinates(image, x, y, image_centers_area_size, offset): # compute the image center coordinates on the plot center_x = int(image_centers_area_size * x) + offset - center_y = int(image_centers_area_size * y) + offset + + # in matplotlib, the y axis is directed upward + # to have the same here, we need to mirror the y coordinate + center_y = int(image_centers_area_size * (1 - y)) + offset # knowing the image center, compute the coordinates of the top left and bottom right corner tl_x = center_x - int(image_width / 2) @@ -144,15 +105,7 @@ def compute_plot_coordinates(image, x, y, image_centers_area_size, offset): return tl_x, tl_y, br_x, br_y -def visualize_tsne_plot(tsne, images, labels, plot_size=1000, max_image_size=100): - # extract x and y coordinates representing the positions of the images on T-SNE plot - tx = tsne[:, 0] - ty = tsne[:, 1] - - # scale and move the coordinates so they fit [0; 1] range - tx = scale_to_01_range(tx) - ty = scale_to_01_range(ty) - +def visualize_tsne_images(tx, ty, images, labels, plot_size=1000, max_image_size=100): # we'll put the image centers in the central area of the plot # and use offsets to make sure the images fit the plot offset = max_image_size // 2 @@ -181,12 +134,54 @@ def visualize_tsne_plot(tsne, images, labels, plot_size=1000, max_image_size=100 # put the image to its TSNE coordinates using numpy subarray indices tsne_plot[tl_y:br_y, tl_x:br_x, :] = image - plot_legend(colors_per_class) - cv2.imshow('T-SNE', tsne_plot) cv2.waitKey() +def visualize_tsne_points(tx, ty, labels): + # initialize matplotlib plot + fig = plt.figure() + ax = fig.add_subplot(111) + + # for every class, we'll add a scatter plot separately + for label in colors_per_class: + # find the samples of the current class in the data + indices = [i for i, l in enumerate(labels) if l == label] + + # extract the coordinates of the points of this class only + current_tx = np.take(tx, indices) + current_ty = np.take(ty, indices) + + # convert the class color to matplotlib format: + # BGR -> RGB, divide by 255, convert to np.array + color = np.array([colors_per_class[label][::-1]], dtype=np.float) / 255 + + # add a scatter plot with te correponding color and label + ax.scatter(current_tx, current_ty, c=color, label=label) + + # build a legend using the labels we set previously + ax.legend(loc='best') + + # finally, show the plot + plt.show() + + +def visualize_tsne(tsne, images, labels, plot_size=1000, max_image_size=100): + # extract x and y coordinates representing the positions of the images on T-SNE plot + tx = tsne[:, 0] + ty = tsne[:, 1] + + # scale and move the coordinates so they fit [0; 1] range + tx = scale_to_01_range(tx) + ty = scale_to_01_range(ty) + + # visualize the plot: samples as colored points + visualize_tsne_points(tx, ty, labels) + + # visualize the plot: samples as images + visualize_tsne_images(tx, ty, images, labels, plot_size=1000, max_image_size=50) + + def main(): parser = argparse.ArgumentParser() @@ -197,15 +192,15 @@ def main(): fix_random_seeds() - model_outputs, labels, image_paths = get_model_outputs( + features, labels, image_paths = get_features( dataset=args.path, batch=args.batch, num_images=args.num_images ) - tsne = TSNE(n_components=2).fit_transform(model_outputs) + tsne = TSNE(n_components=2).fit_transform(features) - visualize_tsne_plot(tsne, image_paths, labels) + visualize_tsne(tsne, image_paths, labels) if __name__ == '__main__': main() From c7e27b6e5ea6b150763ed6114dcc2592f883e8a1 Mon Sep 17 00:00:00 2001 From: Anna Petrovicheva Date: Tue, 7 Apr 2020 15:42:07 +0300 Subject: [PATCH 3/5] Cleanup --- TSNE/tsne.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/TSNE/tsne.py b/TSNE/tsne.py index 5aec05f9c..d1345fe1a 100644 --- a/TSNE/tsne.py +++ b/TSNE/tsne.py @@ -58,9 +58,16 @@ def get_features(dataset, batch, num_images): return features, labels, image_paths +# scale and move the coordinates so they fit [0; 1] range def scale_to_01_range(x): + # compute the distribution range value_range = (np.max(x) - np.min(x)) + + # move the distribution so that it starts from zero + # by extracting the minimal value from all its values starts_from_zero = x - np.min(x) + + # make the distribution fit [0; 1] by dividing by its range return starts_from_zero / value_range @@ -111,8 +118,7 @@ def visualize_tsne_images(tx, ty, images, labels, plot_size=1000, max_image_size offset = max_image_size // 2 image_centers_area_size = plot_size - 2 * offset - tsne_plot = np.zeros((plot_size, plot_size, 3), np.uint8) - tsne_plot.fill(255) + tsne_plot = 255 * np.zeros((plot_size, plot_size, 3), np.uint8) # now we'll put a small copy of every image to its corresponding T-SNE coordinate for image_path, label, x, y in tqdm( @@ -156,7 +162,7 @@ def visualize_tsne_points(tx, ty, labels): # BGR -> RGB, divide by 255, convert to np.array color = np.array([colors_per_class[label][::-1]], dtype=np.float) / 255 - # add a scatter plot with te correponding color and label + # add a scatter plot with the correponding color and label ax.scatter(current_tx, current_ty, c=color, label=label) # build a legend using the labels we set previously @@ -179,7 +185,7 @@ def visualize_tsne(tsne, images, labels, plot_size=1000, max_image_size=100): visualize_tsne_points(tx, ty, labels) # visualize the plot: samples as images - visualize_tsne_images(tx, ty, images, labels, plot_size=1000, max_image_size=50) + visualize_tsne_images(tx, ty, images, labels, plot_size=plot_size, max_image_size=max_image_size) def main(): From fc3de82c3446a43965c5b27516596283a194197c Mon Sep 17 00:00:00 2001 From: Anna Petrovicheva Date: Tue, 7 Apr 2020 17:10:39 +0300 Subject: [PATCH 4/5] Cleanup --- TSNE/tsne.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TSNE/tsne.py b/TSNE/tsne.py index d1345fe1a..0b72bd061 100644 --- a/TSNE/tsne.py +++ b/TSNE/tsne.py @@ -118,7 +118,7 @@ def visualize_tsne_images(tx, ty, images, labels, plot_size=1000, max_image_size offset = max_image_size // 2 image_centers_area_size = plot_size - 2 * offset - tsne_plot = 255 * np.zeros((plot_size, plot_size, 3), np.uint8) + tsne_plot = 255 * np.ones((plot_size, plot_size, 3), np.uint8) # now we'll put a small copy of every image to its corresponding T-SNE coordinate for image_path, label, x, y in tqdm( From 73e7be14fe0d3898f43590e9e0564ce0f9f06e46 Mon Sep 17 00:00:00 2001 From: Anna Petrovicheva Date: Wed, 8 Apr 2020 13:14:12 +0300 Subject: [PATCH 5/5] Added requirements --- TSNE/requirements.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 TSNE/requirements.txt diff --git a/TSNE/requirements.txt b/TSNE/requirements.txt new file mode 100644 index 000000000..eabd6e3ec --- /dev/null +++ b/TSNE/requirements.txt @@ -0,0 +1,6 @@ +torch==1.4 +torchvision==0.5.0 +scikit-learn==0.22.2.post1 +opencv-python>=3.4.1.15 +matplotlib==3.2.1 +tqdm==4.44.1