Create UMAP of single cell features on EBSS starvation data
[1]:
from livecellx.sample_data import tutorial_three_image
from pathlib import Path
from livecellx.core.datasets import LiveCellImageDataset
import glob
dataset_dir_path = Path(
"../datasets/EBSS_Starvation/tif_STAV-A549_VIM_24hours_NoTreat_NA_YL_Ti2e_2022-12-21/XY16/"
)
mask_dataset_path = Path("../datasets/EBSS_Starvation/tif_STAV-A549_VIM_24hours_NoTreat_NA_YL_Ti2e_2022-12-21/out/XY16/seg")
mask_dataset = LiveCellImageDataset(mask_dataset_path, ext="png")
time2url = sorted(glob.glob(str((Path(dataset_dir_path) / Path("*_DIC.tif")))))
time2url = {i: path for i, path in enumerate(time2url)}
dic_dataset = LiveCellImageDataset(time2url=time2url, ext="tif")
291 png img file paths loaded:
[2]:
from livecellx.core.io_sc import prep_scs_from_mask_dataset
scs = prep_scs_from_mask_dataset(mask_dataset, dic_dataset)
100%|██████████| 291/291 [00:18<00:00, 15.91it/s]
[3]:
from livecellx.trajectory.feature_extractors import compute_haralick_features, compute_skimage_regionprops
from livecellx.preprocess.utils import normalize_img_to_uint8
from livecellx.core.parallel import parallelize
inputs = []
for sc in scs:
# features = compute_skimage_regionprops(sc, preprocess_img_func=normalize_img_to_uint8, sc_level_normalize=True)
inputs.append({
"sc": sc,
"feature_key": "skimage",
"preprocess_img_func": normalize_img_to_uint8,
"sc_level_normalize": True,
})
def compute_skimage_regionprops_wrapper(**input):
sc = input["sc"]
compute_skimage_regionprops(**input)
return sc
scs = parallelize(compute_skimage_regionprops_wrapper, inputs)
100%|██████████| 6605/6605 [04:13<00:00, 26.06it/s]
[4]:
def create_sc_table(scs, normalize_features=True):
import pandas as pd
import numpy as np
df = pd.DataFrame([sc.get_feature_pd_series() for sc in scs])
if normalize_features:
for col in df.columns:
df[col] = (df[col] - df[col].mean())
col_std = df[col].std()
if col_std != 0 and not np.isnan(col_std):
df[col] /= col_std
# remove column t from df
df.drop("t", axis=1, inplace=True)
# df["sc_id"] = [sc.id for sc in scs]
# df["t"] = [sc.timeframe for sc in scs]
# df.set_index("sc_id", inplace=True)
return df
sc_feature_table = create_sc_table(scs)
sc_feature_table[:2]
[4]:
| skimage_area | skimage_area_bbox | skimage_area_convex | skimage_area_filled | skimage_axis_major_length | skimage_axis_minor_length | skimage_centroid-0 | skimage_centroid-1 | skimage_centroid_local-0 | skimage_centroid_local-1 | ... | skimage_moments_weighted_normalized-2-2 | skimage_moments_weighted_normalized-2-3 | skimage_moments_weighted_normalized-3-0 | skimage_moments_weighted_normalized-3-1 | skimage_moments_weighted_normalized-3-2 | skimage_moments_weighted_normalized-3-3 | skimage_orientation | skimage_perimeter | skimage_perimeter_crofton | skimage_solidity | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -0.509338 | -0.462826 | -0.443450 | -0.509338 | -0.756158 | -0.328665 | -0.534969 | -0.723864 | -0.534969 | -0.723864 | ... | -0.315010 | 0.164744 | -0.037891 | -0.539973 | -0.061965 | -0.325733 | -0.913183 | -0.584333 | -0.585062 | 0.142696 |
| 1 | -0.764452 | -0.620623 | -0.647537 | -0.764452 | -1.019299 | -1.266440 | -0.952863 | -1.153251 | -0.952863 | -1.153251 | ... | -0.179977 | -0.260524 | 0.017620 | 0.034351 | -0.219412 | -0.152237 | 0.231253 | -1.029229 | -1.027540 | 0.215172 |
2 rows × 143 columns
Normalize features
Perform UMAP on features
[5]:
!pip install umap-learn
Requirement already satisfied: umap-learn in /home/ken67/anaconda3/envs/livecell/lib/python3.10/site-packages (0.5.3)
Requirement already satisfied: scikit-learn>=0.22 in /home/ken67/anaconda3/envs/livecell/lib/python3.10/site-packages (from umap-learn) (1.1.3)
Requirement already satisfied: numpy>=1.17 in /home/ken67/anaconda3/envs/livecell/lib/python3.10/site-packages (from umap-learn) (1.23.5)
Requirement already satisfied: tqdm in /home/ken67/anaconda3/envs/livecell/lib/python3.10/site-packages (from umap-learn) (4.64.1)
Requirement already satisfied: pynndescent>=0.5 in /home/ken67/anaconda3/envs/livecell/lib/python3.10/site-packages (from umap-learn) (0.5.8)
Requirement already satisfied: scipy>=1.0 in /home/ken67/anaconda3/envs/livecell/lib/python3.10/site-packages (from umap-learn) (1.9.3)
Requirement already satisfied: numba>=0.49 in /home/ken67/anaconda3/envs/livecell/lib/python3.10/site-packages (from umap-learn) (0.56.4)
Requirement already satisfied: setuptools in /home/ken67/anaconda3/envs/livecell/lib/python3.10/site-packages (from numba>=0.49->umap-learn) (65.5.0)
Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /home/ken67/anaconda3/envs/livecell/lib/python3.10/site-packages (from numba>=0.49->umap-learn) (0.39.1)
Requirement already satisfied: joblib>=0.11 in /home/ken67/anaconda3/envs/livecell/lib/python3.10/site-packages (from pynndescent>=0.5->umap-learn) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /home/ken67/anaconda3/envs/livecell/lib/python3.10/site-packages (from scikit-learn>=0.22->umap-learn) (3.1.0)
[6]:
import umap
reducer = umap.UMAP()
[7]:
# check if any columns are all NaN
sc_feature_table = sc_feature_table.dropna(axis=1, how="all")
[8]:
embedding = reducer.fit_transform(sc_feature_table)
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
[9]:
embedding.shape
[9]:
(6605, 2)
[10]:
import matplotlib.pyplot as plt
plt.scatter(
embedding[:, 0],
embedding[:, 1],
c=[sc.timeframe for sc in scs],
alpha=0.5,
)
# add colorbar
plt.colorbar()
[10]:
<matplotlib.colorbar.Colorbar at 0x7f3b95070b80>
[11]:
# unnormalized_img_features = create_sc_table(scs, normalize_features=False)
# # drop na
# unnormalized_img_features = unnormalized_img_features.dropna(axis=1, how="all")
# reducer = umap.UMAP()
# unnormalized_embedding = reducer.fit_transform(unnormalized_img_features)
# plt.scatter(
# unnormalized_embedding[:, 0],
# unnormalized_embedding[:, 1],
# c=[sc.timeframe for sc in scs],
# )
[12]:
# list(unnormalized_img_features.keys())