Skip to content
Snippets Groups Projects
Commit ceb7a630 authored by s.islam's avatar s.islam
Browse files

Running Code: First Version

parent 4f3da7ca
No related branches found
No related tags found
No related merge requests found
from typing import Any, List
import random
from numpy import random
import h5py
import os
......@@ -25,42 +25,37 @@ comm = MPI.COMM_WORLD
#pli_path = '/media/tushar/A2246889246861F1/Master Thesis MAIA/example-data/pli/NTransmittance'
pli_path = '/p/fastdata/pli/Private/oberstrass1/datasets/vervet1818/vervet1818-stained/data/aligned/pli/NTransmittance'
#cyto_path = '/media/tushar/A2246889246861F1/Master Thesis MAIA/example-data/stained'
cyto_path = '/p/fastdata/pli/Private/oberstrass1/datasets/vervet1818/vervet1818-stained/data/aligned/stained'
pli_files_list = [file for file in os.listdir(pli_path) if file.endswith(('.h5', '.hdf', '.h4', '.hdf4', '.he2', '.hdf5', '.he5'))]
pli_files_list.sort()
cyto_files_list = [file for file in os.listdir(cyto_path) if file.endswith(('.h5', '.hdf', '.h4', '.hdf4', '.he2', '.hdf5', '.he5'))]
cyto_files_list.sort()
# print(len(pli_files_list))
# print(pli_files_list)
# print(cyto_files_list)
num_images = len(pli_files_list)
#num_images = len(pli_files_list)
class TestSampler(Dataset):
# Gives you a random crop and a random image at each request
def __init__(self, pli_list, cyto_list, transforms, crop_size, dataset_size):
def __init__(self, pli_files_list, cyto_files_list, transforms, crop_size, dataset_size):
# crop_size is the size before the rotation and center crop. So the patch_size * sqrt(2)
# dataset_size defines the number of drawn patches per epoch. As we are drawing (arbitrary many) random patches we have to set is manually
super().__init__()
# list of pli has to be in the same order as list of cyto. So index i in pli should correspond to the same index in cyto
self.list_of_pli = pli_list
self.list_of_cyto = cyto_list
self.n_images = num_images
self.list_of_pli = pli_files_list
self.list_of_cyto = cyto_files_list
self.n_images = len(self.list_of_pli)
self.transforms = transforms
self.crop_size = crop_size
self.dataset_size = dataset_size
def __getitem__(self, ix):
# Get a random image
i = random.randint(0, self.n_images-1)
i = random.randint(self.n_images)
pli_image = self.list_of_pli[i]
cyto_image = self.list_of_cyto[i]
......@@ -73,7 +68,8 @@ class TestSampler(Dataset):
random_crop_cyto = cyto_image[y:y + self.crop_size, x:x + self.crop_size]
# Apply transforms on pli and cyto simultaniously
sample = self.transforms(pli_image=random_crop_pli, cyto_image=random_crop_cyto)
sample = self.transforms(image=random_crop_pli, cyto_image=random_crop_cyto)
sample["pli_image"] = sample.pop("image")
return sample
def __len__(self):
......@@ -119,48 +115,67 @@ class TestDataModule(pl.LightningDataModule):
size = comm.size
# Load data from disk
if self.pli_train and self.cyto_train is None:
if self.pli_train is None or self.cyto_train is None:
print(f"Rank {rank}/{size} preparing data")
# TODO: Load the PLI and Cytp train data here as lists of numpy arrays: List[np.ndarray]
# Load the pyramid/00 per file
#For JSC Training.
#pli_path = '/p/fastdata/pli/Private/oberstrass1/datasets/vervet1818/vervet1818-stained/data/aligned/pli/NTransmittance'
#cyto_path = '/p/fastdata/pli/Private/oberstrass1/datasets/vervet1818/vervet1818-stained/data/aligned/stained'
#For Local Machine Training.
pli_path = '/media/tushar/A2246889246861F1/Master Thesis MAIA/example-data/pli/NTransmittance'
cyto_path = '/media/tushar/A2246889246861F1/Master Thesis MAIA/example-data/stained'
pli_files_list = [file for file in os.listdir(pli_path) if
file.endswith(('.h5', '.hdf', '.h4', '.hdf4', '.he2', '.hdf5', '.he5'))]
pli_files_list.sort()
cyto_files_list = [file for file in os.listdir(cyto_path) if
file.endswith(('.h5', '.hdf', '.h4', '.hdf4', '.he2', '.hdf5', '.he5'))]
cyto_files_list.sort()
self.pli_train = []
self.cyto_train = []
for i in range(0,3):
for i in range(0,4):
pli_train_file = h5py.File(os.path.join(pli_path, pli_files_list[i]), 'r')
pli_train_file = pli_train_file['pyramid/00']
pli_train_file = np.asarray(pli_train_file)
pli_train_file = np.asarray(pli_train_file).astype(np.float32)
pli_train_file = pli_train_file - 0.5
self.pli_train.append(pli_train_file)
for i in range(0,3):
cyto_train_file = h5py.File(os.path.join(pli_path, cyto_files_list[i]), 'r')
for i in range(0,4):
cyto_train_file = h5py.File(os.path.join(cyto_path, cyto_files_list[i]), 'r')
cyto_train_file = cyto_train_file['pyramid/00']
cyto_train_file = np.asarray(cyto_train_file)
cyto_train_file = np.asarray(cyto_train_file).astype(np.float32)
cyto_train_file = (cyto_train_file/255) - 0.5
self.cyto_train.append(cyto_train_file)
else:
print(f"Train data for rank {rank}/{size} already prepared")
if self.pli_val and self.cyto_val is None:
if self.pli_val is None or self.cyto_val is None:
print(f"Rank {rank}/{size} preparing data")
# TODO: Load the PLI and Cytp val data here as lists of numpy arrays: List[np.ndarray]
# This should contain only unseen images
# Load the pyramid/00 per file
pli_val = []
cyto_val = []
self.pli_val = []
self.cyto_val = []
pli_val_file = h5py.File(os.path.join(pli_path, pli_files_list[4]), 'r')
pli_val_file = pli_val_file['pyramid/00']
pli_val_file = np.asarray(pli_val_file)
pli_val.append(pli_val_file)
pli_val_file = np.asarray(pli_val_file).astype(np.float32)
pli_val_file = pli_val_file - 0.5
self.pli_val.append(pli_val_file)
cyto_val_file = h5py.File(os.path.join(pli_path, cyto_files_list[4]), 'r')
cyto_val_file = h5py.File(os.path.join(cyto_path, cyto_files_list[4]), 'r')
cyto_val_file = cyto_val_file['pyramid/00']
cyto_val_file = np.asarray(cyto_val_file)
cyto_val.append(cyto_val_file)
cyto_val_file = np.asarray(cyto_val_file).astype(np.float32)
cyto_val_file = (cyto_val_file/255) - 0.5
self.cyto_val.append(cyto_val_file)
else:
print(f"Validation data for rank {rank}/{size} already prepared")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment