supervision with ordinal labels

[ ]:

[1]:
#pip install pacmap KDEpy
[2]:
import numpy as np
import scanpy as sc
import pacmap
import stream2 as st2
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
/home/jo/anaconda3/envs/stream2/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

Load data

We directly load preprocessed data from Schiebinger 2019 subsampled to ~10% of points with geosketch

Uncomment the cell below if you wish to replicate preprocessing yourself

[3]:
adata = sc.read('../data/reprogramming/wot.h5ad')
[4]:
## download data from https://broadinstitute.github.io/wot/tutorial/
#import wot
#import pandas as pd

#PATH = '../../git/wot/notebooks/data/'
#
##---Path to input files
#FLE_COORDS_PATH ='data/fle_coords.txt'
#FULL_DS_PATH = 'data/ExprMatrix.h5ad'
#VAR_DS_PATH = 'data/ExprMatrix.var.genes.h5ad'
#CELL_DAYS_PATH = 'data/cell_days.txt'
#GENE_SETS_PATH = 'data/gene_sets.gmx'
#GENE_SET_SCORES_PATH = 'data/gene_set_scores.csv'
#CELL_SETS_PATH = 'data/cell_sets.gmt'
#MAJOR_CELL_SETS_PATH = 'data/major_cell_sets.gmt'
#SERUM_CELL_IDS_PATH = 'data/serum_cell_ids.txt'
#CELL_GROWTH_PATH = 'data/growth_gs_init.txt'
#
#coord_df = pd.read_csv(PATH+FLE_COORDS_PATH, index_col='id', sep='\t')
#days_df = pd.read_csv(PATH+CELL_DAYS_PATH, index_col='id', sep='\t')
#gene_set_df = pd.read_csv(PATH+GENE_SET_SCORES_PATH, index_col='id').rename(columns={'Trophoblast':'Trophoblast.identity'})
#cell_sets = wot.io.read_sets(PATH+CELL_SETS_PATH)
#serum_id = pd.read_csv(PATH+SERUM_CELL_IDS_PATH,header=None)
#adata = sc.read_h5ad(PATH+VAR_DS_PATH)
#
## subset labeled days
#adata.obs['label']=days_df
#adata=adata[~adata.obs['label'].isna()].copy()
#sc.pp.pca(adata)
#
## add annotations
#df=cell_sets.to_df()
#df['cell_sets'] = ''
#for c in df.columns[:-1]:
#    df.loc[df[c]>0,'cell_sets']=c
#adata.obs=adata.obs.join(df)
#adata.obs.loc[adata.obs['cell_sets'].isna(),'cell_sets']='nan'
#adata.obs['cell_sets']=adata.obs['cell_sets'].astype(str)
#
#adata.obs['cell_sets_simple']=adata.obs['cell_sets'].copy()
#adata.obs['cell_sets_simple'][adata.obs['cell_sets'].isin(['Trophoblast','SpongioTropho','ProgenitorTropho','SpiralArteryTrophoGiant'])] = 'Trophoblast'
#adata.obs['cell_sets_simple'][adata.obs['cell_sets'].isin(['Neural','RadialGlia','Neuron','Astrocyte'])] = 'Neural'
#adata.uns['cell_sets_simple_color'] = {
# 'Epithelial': '#EC8E26',
# 'IPS': '#c0c1c0',
# 'MET': '#9e50c7',
# 'Neural': '#47e026',
# 'OPC': '#7f7f7f',
# 'Stromal': '#3f4af4',
# 'Trophoblast': '#b5361a',
# 'nan': '#434342'}
#
#adata.obs['serum']='0'
#adata.obs['serum'][adata.obs.index.isin(serum_id[0].values)]='1'
#
#from geosketch import gs
#sketch_index = gs(adata.obsm['X_pca'], N=25000, replace=False,seed=0)
#s_adata = adata[sketch_index]
#s_adata.write('../data/wot.h5ad')

### compute cytotrace scores using pyrovelocity implementation
#import sys
#sys.path.append('../../pyrovelocity/pyrovelocity/')
#import cytotrace
#import sklearn
#
#PATH = '../../wot/notebooks/data/'
#FULL_DS_PATH = 'data/ExprMatrix.h5ad'
#adata_full = sc.read_h5ad(PATH+FULL_DS_PATH)
#s_adata = sc.read('../data/reprogramming/wot.h5ad')
#s_adata_full = adata_full[s_adata.obs_names].copy()
#ss_adata_full = sc.pp.subsample(s_adata_full,n_obs=5000,copy=True)
#cyto = cytotrace.cytotrace(ss_adata_full,layer=None)
#
#knnreg = sklearn.neighbors.KNeighborsRegressor(weights='distance').fit(ss_adata_full.X,cyto['CytoTRACE'])
#s_adata.obs['CytoTRACE'] = knnreg.predict(s_adata_full.X)

Supervised dimension reduction

In this dataset cells are already quite well ordered by time. Still there are discrepancies where earlier cells are sampled near the tip of the distribution, after cells from later days and vice versa

If needed we can correct the few outlier ordinal labels by voting

[5]:
k = 30

knn_label_vote = adata.obs['label'].copy()
dis, idx = NearestNeighbors(n_neighbors=k,n_jobs=30).fit(adata.obsm['X_pca']).kneighbors()

for i, x in enumerate(np.array(adata.obs['label'])[idx]):
    unique, count = np.unique(x, return_counts=1)
    argmax = np.argmax(count)
    if count[argmax] > k/2:
        knn_label_vote[i] = unique[argmax]

adata.obs['knn_label_vote'] = knn_label_vote

We perform unsupervised dimensionality reduction as baseline

[6]:
if "X_pacmap" not in adata.obsm:
    adata.obsm['X_pacmap'] = pacmap.PaCMAP(n_neighbors=k, distance='angular',random_state=0, FP_ratio=.4).fit_transform(adata.obsm['X_pca'])
sc.pl.embedding(adata,basis='X_pacmap',color=['label','cell_sets_simple','serum','CytoTRACE'],ncols=2)
/home/jo/anaconda3/envs/stream2/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
_images/supervision_ordinal_11_1.png

There is usually a decision to be made which notion of time we care more about respecting: pseudotime (e.g., following data density or the number of expressed genes) vs wall time. Heuristic tools such as CytoTRACE (https://cytotrace.stanford.edu/) and psupertime (https://github.com/wmacnair/psupertime) can help to define pseudotime and find a consensus with wall time labels.

In case we decide to more closely follow wall time, a simple solution is to perform supervised dimensionality reduction by using ordinal labels and choosing a supervision strength.

[7]:
if "X_pacmap_sup" not in adata.obsm:
    knn_dists,knn_idx = st2.tl.ordinal_knn(adata,obsm='X_pca',ordinal_label='label', method='force',
                                              n_neighbors = k, n_natural = 2, metric = 'cosine')


    scaled_dist = np.ones_like(knn_idx,dtype='float32')
    pair_neighbors = pacmap.sample_neighbors_pair(adata.obsm['X_pca'], scaled_dist, knn_idx, k)
    adata.obsm['X_pacmap_sup'] = pacmap.PaCMAP(n_neighbors=k,random_state=0,FP_ratio=.4,
                                               pair_neighbors=pair_neighbors).fit_transform(adata.obsm['X_pca'])
sc.pl.embedding(adata,basis='X_pacmap_sup',color=['label','cell_sets_simple','serum','CytoTRACE'],ncols=2)
_images/supervision_ordinal_13_0.png

Trajectory inference

Let us seed the graph with a minimum spanning tree. We can see that 10 clusters are not enough

[8]:
adata.obsm['X_dr']=adata.obsm['X_pacmap_sup']
st2.tl.seed_graph(adata,n_clusters=10,use_weights=False)
st2.pl.graph(adata,key='seed_epg',color=['label'],show_node=1)
Seeding initial graph...
Clustering...
K-Means clustering ...
Calculating minimum spanning tree...
_images/supervision_ordinal_15_1.png

A simple solution is to increase the number of clusters

[9]:
st2.tl.seed_graph(adata,n_clusters=50,use_weights=False)
st2.pl.graph(adata,key='seed_epg',color=['label'],show_node=1)
Seeding initial graph...
Clustering...
K-Means clustering ...
Calculating minimum spanning tree...
_images/supervision_ordinal_17_1.png

The seed graph is still not ideal, e.g. for IPS cells because the start of the branch is very sparse.

This can be improved by computing density and assigning higher weights to points in sparse regions of data

[10]:
st2.tl.get_weights(adata,bandwidth=.5,method='fft')
sc.pl.embedding(adata,basis='X_dr',color='pointweights')

st2.tl.seed_graph(adata,n_clusters=50,use_weights=True)
st2.pl.graph(adata,key='seed_epg',color=['label'],show_node=1)
_images/supervision_ordinal_19_0.png
Seeding initial graph...
Clustering...
K-Means clustering ...
Calculating minimum spanning tree...
_images/supervision_ordinal_19_2.png

We now refine the guess by computing an unsupervised principal graph

We can make use of GPU acceleration with the cupy library; if you don’t have one set GPU=False

[11]:
st2.tl.learn_graph(adata,n_nodes=100,epg_alpha=0.02,epg_mu=0.1,epg_lambda=0.01,
                   use_weights=True, GPU=False, store_evolution=True,
                   #max_candidates={'AddNode2Node': 15, 'BisectEdge': 10, 'ShrinkEdge':10},
                   verbose=1,)
st2.pl.graph(adata,key='epg',color=['label','cell_sets_simple'],alpha=.2,size=2)
Constructing tree 1 of 1 / Subset 1 of 1
Computing EPG with  100  nodes on  25000  points and  2  dimensions

BARCODE ENERGY  NNODES  NEDGES  NRIBS   NSTARS  NRAYS   NRAYS2  MSE     MSEP    FVE     FVEP    UE      UR      URN     URN2    URSD

7||51   0.5845  51      50      35      7       0       0       0.2413  0.1981  0.9929  0.9942  0.3223  0.0209  1.0646  54.2967 0
6||52   0.5588  52      51      38      6       0       0       0.2399  0.2001  0.9929  0.9941  0.3064  0.0125  0.6525  33.9303 0
5||53   0.5745  53      52      41      5       0       0       0.2497  0.2118  0.9926  0.9938  0.295   0.0298  1.578   83.6334 0
5||54   0.5526  54      53      42      5       0       0       0.2425  0.2077  0.9929  0.9939  0.2835  0.0266  1.4389  77.7016 0
5||55   0.5486  55      54      43      5       0       0       0.2458  0.212   0.9928  0.9938  0.2731  0.0297  1.6312  89.7157 0
5||56   0.5396  56      55      44      5       0       0       0.25    0.2178  0.9926  0.9936  0.2593  0.0303  1.6951  94.9246 0
4||57   0.5259  57      56      47      4       0       0       0.2494  0.2188  0.9927  0.9936  0.2493  0.0273  1.5565  88.7177 0
4||58   0.5159  58      57      48      4       0       0       0.2456  0.2167  0.9928  0.9936  0.2446  0.0258  1.4959  86.7612 0
4||59   0.5059  59      58      49      4       0       0       0.2364  0.2067  0.993   0.9939  0.2445  0.025   1.4779  87.1947 0
4||60   0.504   60      59      50      4       0       0       0.233   0.2042  0.9931  0.994   0.2456  0.0254  1.5242  91.4533 0
3||61   0.4946  61      60      53      3       0       0       0.2281  0.2002  0.9933  0.9941  0.2437  0.0228  1.3935  85.0025 0
3||62   0.4906  62      61      54      3       0       0       0.2282  0.2004  0.9933  0.9941  0.2408  0.0216  1.342   83.2028 0
3||63   0.4836  63      62      55      3       0       0       0.2274  0.2009  0.9933  0.9941  0.2361  0.0201  1.2674  79.8435 0
3||64   0.4784  64      63      56      3       0       0       0.2251  0.2001  0.9934  0.9941  0.2332  0.0201  1.2884  82.4596 0
3||65   0.4695  65      64      57      3       0       0       0.2189  0.1951  0.9936  0.9943  0.2303  0.0202  1.3161  85.5468 0
3||66   0.4647  66      65      58      3       0       0       0.2178  0.1951  0.9936  0.9943  0.2254  0.0215  1.4165  93.4906 0
3||67   0.4592  67      66      59      3       0       0       0.2157  0.1932  0.9936  0.9943  0.2226  0.0209  1.3994  93.762  0
3||68   0.4559  68      67      60      3       0       0       0.2141  0.1918  0.9937  0.9944  0.2213  0.0205  1.3927  94.7014 0
3||69   0.4501  69      68      61      3       0       0       0.2119  0.19    0.9938  0.9944  0.2192  0.019   1.3121  90.5346 0
3||70   0.4451  70      69      62      3       0       0       0.2096  0.1884  0.9938  0.9945  0.2163  0.0192  1.3435  94.0419 0
3||71   0.442   71      70      63      3       0       0       0.208   0.1867  0.9939  0.9945  0.2142  0.0197  1.4021  99.5515 0
3||72   0.4355  72      71      64      3       0       0       0.203   0.1828  0.994   0.9946  0.214   0.0185  1.3288  95.6731 0
3||73   0.4339  73      72      65      3       0       0       0.2035  0.1831  0.994   0.9946  0.2103  0.0201  1.4665  107.0509        0
3||74   0.4279  74      73      66      3       0       0       0.1996  0.18    0.9941  0.9947  0.209   0.0193  1.4271  105.6087        0
3||75   0.4242  75      74      67      3       0       0       0.1987  0.1793  0.9941  0.9947  0.2067  0.0188  1.4063  105.4736        0
3||76   0.4182  76      75      68      3       0       0       0.1941  0.175   0.9943  0.9948  0.2054  0.0187  1.4208  107.9839        0
3||77   0.413   77      76      69      3       0       0       0.1894  0.171   0.9944  0.995   0.2041  0.0195  1.4993  115.4427        0
3||78   0.4113  78      77      70      3       0       0       0.1902  0.1721  0.9944  0.9949  0.2015  0.0196  1.5296  119.307 0
3||79   0.4081  79      78      71      3       0       0       0.1896  0.1717  0.9944  0.9949  0.1991  0.0194  1.5312  120.9646        0
3||80   0.404   80      79      72      3       0       0       0.1874  0.1703  0.9945  0.995   0.197   0.0196  1.5717  125.7345        0
3||81   0.4011  81      80      73      3       0       0       0.1871  0.1707  0.9945  0.995   0.1947  0.0193  1.5664  126.8775        0
3||82   0.3964  82      81      74      3       0       0       0.184   0.1675  0.9946  0.9951  0.1942  0.0182  1.4938  122.492 0
3||83   0.3925  83      82      75      3       0       0       0.1816  0.1659  0.9947  0.9951  0.1919  0.019   1.5747  130.7019        0
3||84   0.3875  84      83      76      3       0       0       0.1765  0.1614  0.9948  0.9952  0.1917  0.0194  1.6265  136.6294        0
3||85   0.3866  85      84      77      3       0       0       0.1786  0.1629  0.9947  0.9952  0.1898  0.0182  1.5454  131.3622        0
3||86   0.3817  86      85      78      3       0       0       0.1744  0.1593  0.9949  0.9953  0.1882  0.0191  1.64    141.0405        0
3||87   0.3807  87      86      79      3       0       0       0.176   0.1608  0.9948  0.9953  0.1866  0.0181  1.5748  137.0036        0
3||88   0.3775  88      87      80      3       0       0       0.1746  0.1599  0.9949  0.9953  0.1845  0.0184  1.6178  142.3653        0
3||89   0.3744  89      88      81      3       0       0       0.1728  0.1585  0.9949  0.9953  0.1832  0.0184  1.6407  146.0203        0
3||90   0.3687  90      89      82      3       0       0       0.1664  0.1525  0.9951  0.9955  0.1838  0.0185  1.6659  149.9304        0
3||91   0.366   91      90      83      3       0       0       0.1665  0.1531  0.9951  0.9955  0.1813  0.0182  1.6587  150.9389        0
3||92   0.3652  92      91      84      3       0       0       0.1677  0.1545  0.9951  0.9955  0.1794  0.0181  1.6624  152.9364        0
3||93   0.3626  93      92      85      3       0       0       0.1682  0.1551  0.995   0.9954  0.1769  0.0175  1.6253  151.1492        0
3||94   0.3597  94      93      86      3       0       0       0.1667  0.1539  0.9951  0.9955  0.1756  0.0174  1.634   153.5971        0
3||95   0.3563  95      94      87      3       0       0       0.1644  0.1515  0.9952  0.9955  0.1741  0.0178  1.692   160.7378        0
3||96   0.3518  96      95      88      3       0       0       0.1609  0.1484  0.9953  0.9956  0.1731  0.0177  1.7028  163.4646        0
3||97   0.3509  97      96      89      3       0       0       0.1607  0.1484  0.9953  0.9956  0.172   0.0182  1.7628  170.9886        0
3||98   0.3486  98      97      90      3       0       0       0.1612  0.1494  0.9953  0.9956  0.1698  0.0175  1.7184  168.4026        0
3||99   0.3453  99      98      91      3       0       0       0.1581  0.1464  0.9953  0.9957  0.1691  0.0181  1.7913  177.3353        0
3||100  0.3437  100     99      92      3       0       0       0.1584  0.1467  0.9953  0.9957  0.1679  0.0174  1.7444  174.4388        0
504.8498  seconds elapsed
_images/supervision_ordinal_21_1.png

Since we used store_evolution=True we have access to graphs for any number of nodes between n_clusters of the seed graph and n_nodes

[12]:
st2.tl.use_graph_with_n_nodes(adata,n_nodes=80)
st2.pl.graph(adata,key='epg',color=['label'])
_images/supervision_ordinal_23_0.png

While the structure is better than the seed graph, the result is suboptimal and some branches do not follow ordinal labels. We can compute a supervised principal graph to get better results Ordinal labels should be smoothed into a continuous variable before learning the supervised graph (here they are already densely sampled so we won’t need this - we just discretize them into 3 bins as an example)

[12]:
#define a root point
root_idx = 0
time = adata.obs['label']
bins = np.histogram(time, bins=3)[1]
adata.obs['clusters'] = np.digitize(time, bins[1:], right=True)

st2.tl.smooth_ordinal_labels(adata, root_idx, ordinal_label='clusters', obsm="X_pacmap_sup")
sc.pl.embedding(adata,'X_pacmap_sup',color=['clusters','ps'])
_images/supervision_ordinal_25_0.png

We then need to define 3 extra variables: root cell, ordinal labels to follow and supervision strength - ordinal_supervision_strength = 1 means equal importance to respecting ordinal labels vs natural data order, - ordinal_supervision_strength = 10 means following ordinal labels is 10 times more important

[13]:
adata2=adata.copy()
st2.tl.learn_graph(
    adata2,
    n_nodes=100,epg_alpha=0.02,epg_mu=0.1,epg_lambda=0.01,
    GPU=False,store_evolution=True,use_weights=True,use_seed=True,
    max_candidates={'AddNode2Node': 15, 'BisectEdge': 10, 'ShrinkEdge':10},verbose=1,

    # supervision parameters
    ordinal_label = 'knn_label_vote',                            #define the ordinal vector to follow
    ordinal_root_point = root_idx,                               #define a root point (e.g., cell with lowest ordinal value)
    ordinal_supervision_strength = 1,                            #define strength of supervision in [0, +inf).
)

st2.pl.graph(adata2,key='epg',color=['label','cell_sets_simple'],alpha=.2,size=2)
#st2.pl.graph(adata2,key='epg',color=['label','cell_sets_simple'],alpha=.2,size=2,
#            fig_path=f'figures/reprogramming/shareseq_stream_umap.png',save_fig=True)
Constructing tree 1 of 1 / Subset 1 of 1
The elastic matrix is being used. Edge configuration will be ignored
Computing EPG with  100  nodes on  25000  points and  2  dimensions

BARCODE ENERGY  NNODES  NEDGES  NRIBS   NSTARS  NRAYS   NRAYS2  MSE     MSEP    FVE     FVEP    UE      UR      URN     URN2    URSD

6||52   0.5399  52      51      38      6       0       0       0.2372  0.1969  0.993   0.9942  0.2922  0.0105  0.5463  28.4089 0
6||53   0.5101  53      52      39      6       0       0       0.2192  0.1814  0.9935  0.9947  0.2812  0.0097  0.5153  27.3126 0
5||54   0.4987  54      53      42      5       0       0       0.2145  0.1766  0.9937  0.9948  0.2735  0.0107  0.5777  31.1957 0
5||55   0.4846  55      54      43      5       0       0       0.2083  0.1728  0.9939  0.9949  0.2663  0.01    0.5492  30.2048 0
5||56   0.4768  56      55      44      5       0       0       0.2086  0.1752  0.9939  0.9948  0.2578  0.0105  0.5854  32.7803 0
4||57   0.4747  57      56      47      4       0       0       0.2199  0.1873  0.9935  0.9945  0.2465  0.0082  0.4667  26.5997 0
4||58   0.4691  58      57      48      4       0       0       0.2143  0.1813  0.9937  0.9947  0.2474  0.0075  0.4364  25.3112 0
4||59   0.4658  59      58      49      4       0       0       0.2121  0.1794  0.9938  0.9947  0.246   0.0078  0.4583  27.0391 0
4||60   0.4576  60      59      50      4       0       0       0.214   0.1825  0.9937  0.9946  0.236   0.0076  0.4558  27.3464 0
4||61   0.446   61      60      51      4       0       0       0.2086  0.1793  0.9939  0.9947  0.2298  0.0075  0.4593  28.0157 0
3||62   0.4378  62      61      54      3       0       0       0.2094  0.182   0.9938  0.9946  0.2203  0.008   0.4959  30.7479 0
3||63   0.4314  63      62      55      3       0       0       0.2099  0.1845  0.9938  0.9946  0.2136  0.0079  0.5     31.4999 0
3||64   0.4273  64      63      56      3       0       0       0.2075  0.183   0.9939  0.9946  0.2115  0.0083  0.5293  33.8736 0
3||65   0.4213  65      64      57      3       0       0       0.2066  0.1828  0.9939  0.9946  0.2068  0.0079  0.5109  33.2085 0
3||66   0.4193  66      65      58      3       0       0       0.2036  0.1797  0.994   0.9947  0.2075  0.0082  0.5399  35.6353 0
3||67   0.4132  67      66      59      3       0       0       0.2002  0.1775  0.9941  0.9948  0.205   0.008   0.5379  36.0367 0
3||68   0.4129  68      67      60      3       0       0       0.1989  0.1762  0.9941  0.9948  0.2062  0.0078  0.5316  36.1516 0
3||69   0.4078  69      68      61      3       0       0       0.1977  0.176   0.9942  0.9948  0.2025  0.0076  0.5267  36.344  0
3||70   0.4049  70      69      62      3       0       0       0.1959  0.1745  0.9942  0.9949  0.2013  0.0076  0.5335  37.3476 0
3||71   0.4029  71      70      63      3       0       0       0.1953  0.174   0.9942  0.9949  0.2002  0.0075  0.5302  37.6442 0
3||72   0.3977  72      71      64      3       0       0       0.1926  0.1718  0.9943  0.9949  0.1979  0.0072  0.5198  37.4246 0
3||73   0.3939  73      72      65      3       0       0       0.1915  0.1712  0.9944  0.995   0.1952  0.0073  0.5296  38.6616 0
3||74   0.3909  74      73      66      3       0       0       0.1911  0.1712  0.9944  0.995   0.1927  0.0072  0.5305  39.255  0
3||75   0.3856  75      74      67      3       0       0       0.1908  0.1719  0.9944  0.9949  0.1877  0.0071  0.535   40.1281 0
3||76   0.3834  76      75      68      3       0       0       0.1889  0.1703  0.9944  0.995   0.1871  0.0074  0.5648  42.9214 0
3||77   0.3804  77      76      69      3       0       0       0.1882  0.1702  0.9945  0.995   0.1845  0.0077  0.5937  45.7116 0
3||78   0.379   78      77      70      3       0       0       0.1872  0.1694  0.9945  0.995   0.1837  0.0081  0.6323  49.317  0
3||79   0.3758  79      78      71      3       0       0       0.1863  0.1692  0.9945  0.995   0.182   0.0075  0.5893  46.5518 0
3||80   0.373   80      79      72      3       0       0       0.1838  0.1669  0.9946  0.9951  0.1814  0.0078  0.6238  49.9013 0
3||81   0.3714  81      80      73      3       0       0       0.1854  0.1688  0.9945  0.995   0.1785  0.0076  0.6147  49.7869 0
3||82   0.3676  82      81      74      3       0       0       0.1828  0.167   0.9946  0.9951  0.1773  0.0075  0.6111  50.1088 0
3||83   0.3659  83      82      75      3       0       0       0.1807  0.1648  0.9947  0.9951  0.1782  0.007   0.5834  48.4181 0
3||84   0.3636  84      83      76      3       0       0       0.1789  0.1637  0.9947  0.9952  0.1775  0.0071  0.6004  50.4372 0
3||85   0.3615  85      84      77      3       0       0       0.1745  0.1589  0.9949  0.9953  0.1797  0.0073  0.6225  52.9125 0
3||86   0.3559  86      85      78      3       0       0       0.1709  0.1557  0.995   0.9954  0.1769  0.0082  0.7028  60.4418 0
3||87   0.35    87      86      79      3       0       0       0.1667  0.1518  0.9951  0.9955  0.175   0.0084  0.7336  63.8214 0
3||88   0.352   88      87      80      3       0       0       0.1659  0.1512  0.9951  0.9955  0.1764  0.0097  0.8505  74.8418 0
3||89   0.3494  89      88      81      3       0       0       0.1649  0.1502  0.9951  0.9956  0.1747  0.0098  0.8724  77.6478 0
3||90   0.3446  90      89      82      3       0       0       0.1625  0.1483  0.9952  0.9956  0.1726  0.0095  0.8573  77.1608 0
3||91   0.338   91      90      83      3       0       0       0.1578  0.144   0.9954  0.9958  0.1721  0.0081  0.734   66.7928 0
3||92   0.3358  92      91      84      3       0       0       0.1584  0.1449  0.9953  0.9957  0.1696  0.0078  0.7171  65.9768 0
3||93   0.333   93      92      85      3       0       0       0.154   0.1404  0.9955  0.9959  0.1711  0.0079  0.7343  68.2859 0
3||94   0.3319  94      93      86      3       0       0       0.1532  0.1398  0.9955  0.9959  0.171   0.0077  0.7223  67.8992 0
3||95   0.3289  95      94      87      3       0       0       0.151   0.1379  0.9956  0.9959  0.1704  0.0075  0.7119  67.6336 0
3||96   0.3249  96      95      88      3       0       0       0.1503  0.1375  0.9956  0.996   0.167   0.0076  0.7295  70.0339 0
3||97   0.3208  97      96      89      3       0       0       0.1484  0.1361  0.9956  0.996   0.1647  0.0076  0.7391  71.6936 0
3||98   0.3178  98      97      90      3       0       0       0.1463  0.1342  0.9957  0.996   0.1638  0.0077  0.7564  74.1267 0
3||99   0.3168  99      98      91      3       0       0       0.1456  0.1335  0.9957  0.9961  0.1634  0.0078  0.773   76.5269 0
3||100  0.3145  100     99      92      3       0       0       0.1445  0.1327  0.9957  0.9961  0.1622  0.0078  0.783   78.2977 0
649.5394  seconds elapsed
_images/supervision_ordinal_27_1.png

For best results we infer the graph without the max candidates heuristic and fine tune nodes and supervision strength parameters

[14]:
adata2=adata.copy()
st2.tl.learn_graph(
    adata2,
    n_nodes=110,epg_alpha=0.02,epg_mu=0.1,epg_lambda=0.01,
    GPU=False,store_evolution=True,use_weights=True,use_seed=True,verbose=1,

    # supervision parameters
    ordinal_label = 'knn_label_vote',                            #define the ordinal vector to follow
    ordinal_root_point = root_idx,                               #define a root point (e.g., cell with lowest ordinal value)
    ordinal_supervision_strength = 2.5,                            #define strength of supervision in [0, +inf).
)

st2.pl.graph(adata2,key='epg',color=['label','cell_sets_simple'],alpha=.2,size=2)
#st2.pl.graph(adata2,key='epg',color=['label','cell_sets_simple'],alpha=.2,size=2,
#            fig_path=f'figures/reprogramming/shareseq_stream_umap.png',save_fig=True)
Constructing tree 1 of 1 / Subset 1 of 1
The elastic matrix is being used. Edge configuration will be ignored
Computing EPG with  110  nodes on  25000  points and  2  dimensions

BARCODE ENERGY  NNODES  NEDGES  NRIBS   NSTARS  NRAYS   NRAYS2  MSE     MSEP    FVE     FVEP    UE      UR      URN     URN2    URSD

6||52   0.5412  52      51      38      6       0       0       0.2401  0.199   0.9929  0.9941  0.2913  0.0098  0.5105  26.5441 0
6||53   0.519   53      52      39      6       0       0       0.223   0.1834  0.9934  0.9946  0.2858  0.0102  0.538   28.5148 0
5||54   0.497   54      53      42      5       0       0       0.2158  0.1771  0.9936  0.9948  0.271   0.0102  0.5486  29.6251 0
5||55   0.4798  55      54      43      5       0       0       0.2043  0.1674  0.994   0.9951  0.2663  0.0091  0.5032  27.6746 0
5||56   0.4665  56      55      44      5       0       0       0.1998  0.1666  0.9941  0.9951  0.2571  0.0096  0.5382  30.1379 0
4||57   0.4697  57      56      47      4       0       0       0.215   0.1813  0.9937  0.9947  0.2481  0.0066  0.3762  21.4451 0
4||58   0.4649  58      57      48      4       0       0       0.2112  0.1781  0.9938  0.9948  0.247   0.0066  0.3853  22.3459 0
4||59   0.4512  59      58      49      4       0       0       0.2119  0.1816  0.9938  0.9947  0.2328  0.0065  0.3856  22.7532 0
3||60   0.4411  60      59      52      3       0       0       0.2133  0.1849  0.9937  0.9946  0.2208  0.0069  0.4167  25.0011 0
3||61   0.4379  61      60      53      3       0       0       0.2097  0.1824  0.9938  0.9946  0.2212  0.007   0.4263  26.0067 0
3||62   0.4335  62      61      54      3       0       0       0.2082  0.1813  0.9939  0.9947  0.2186  0.0068  0.4224  26.1884 0
3||63   0.4281  63      62      55      3       0       0       0.2062  0.1802  0.9939  0.9947  0.2153  0.0065  0.4118  25.9435 0
3||64   0.425   64      63      56      3       0       0       0.2047  0.1786  0.994   0.9947  0.2142  0.0061  0.3881  24.8405 0
3||65   0.4185  65      64      57      3       0       0       0.2044  0.1798  0.994   0.9947  0.2078  0.0062  0.4006  26.0378 0
3||66   0.4161  66      65      58      3       0       0       0.2015  0.1775  0.9941  0.9948  0.2086  0.006   0.396   26.1382 0
3||67   0.4135  67      66      59      3       0       0       0.2007  0.1771  0.9941  0.9948  0.2066  0.0062  0.4129  27.6627 0
3||68   0.4115  68      67      60      3       0       0       0.199   0.1763  0.9941  0.9948  0.206   0.0065  0.4402  29.9334 0
3||69   0.4085  69      68      61      3       0       0       0.1962  0.1739  0.9942  0.9949  0.2063  0.0059  0.4103  28.3074 0
3||70   0.4054  70      69      62      3       0       0       0.1941  0.1721  0.9943  0.9949  0.2054  0.0059  0.4147  29.0308 0
3||71   0.4002  71      70      63      3       0       0       0.1945  0.1732  0.9943  0.9949  0.1995  0.0063  0.4446  31.5657 0
3||72   0.3967  72      71      64      3       0       0       0.1932  0.1725  0.9943  0.9949  0.1975  0.0061  0.441   31.7502 0
3||73   0.3942  73      72      65      3       0       0       0.192   0.1715  0.9943  0.995   0.1963  0.0059  0.432   31.5345 0
3||74   0.3878  74      73      66      3       0       0       0.192   0.1723  0.9943  0.9949  0.1898  0.006   0.4439  32.8482 0
3||75   0.3856  75      74      67      3       0       0       0.1909  0.1719  0.9944  0.9949  0.1888  0.0059  0.4424  33.1787 0
3||76   0.3828  76      75      68      3       0       0       0.1904  0.1716  0.9944  0.9949  0.1869  0.0056  0.4235  32.1886 0
3||77   0.3812  77      76      69      3       0       0       0.189   0.1706  0.9944  0.995   0.1867  0.0056  0.4288  33.0208 0
3||78   0.3755  78      77      70      3       0       0       0.1913  0.1742  0.9944  0.9949  0.1785  0.0057  0.4429  34.5424 0
3||79   0.3715  79      78      71      3       0       0       0.1896  0.1734  0.9944  0.9949  0.1764  0.0055  0.4356  34.4134 0
3||80   0.3707  80      79      72      3       0       0       0.188   0.1717  0.9945  0.9949  0.1773  0.0054  0.4305  34.4403 0
3||81   0.3695  81      80      73      3       0       0       0.1853  0.1694  0.9945  0.995   0.1787  0.0054  0.4414  35.7557 0
3||82   0.3684  82      81      74      3       0       0       0.1832  0.1675  0.9946  0.9951  0.1799  0.0052  0.429   35.1775 0
3||83   0.3629  83      82      75      3       0       0       0.1845  0.1692  0.9946  0.995   0.1734  0.005   0.414   34.3588 0
3||84   0.3602  84      83      76      3       0       0       0.1828  0.1676  0.9946  0.9951  0.1725  0.0048  0.4054  34.0569 0
3||85   0.3584  85      84      77      3       0       0       0.1814  0.1664  0.9947  0.9951  0.1721  0.0049  0.42    35.6965 0
3||86   0.3573  86      85      78      3       0       0       0.1809  0.1659  0.9947  0.9951  0.1716  0.0049  0.4198  36.1012 0
3||87   0.3538  87      86      79      3       0       0       0.1815  0.1671  0.9947  0.9951  0.1675  0.0048  0.4205  36.5866 0
3||88   0.3521  88      87      80      3       0       0       0.1802  0.1659  0.9947  0.9951  0.1672  0.0046  0.4049  35.634  0
3||89   0.3505  89      88      81      3       0       0       0.181   0.1672  0.9947  0.9951  0.1646  0.0049  0.4398  39.1449 0
3||90   0.3462  90      89      82      3       0       0       0.1797  0.1662  0.9947  0.9951  0.1616  0.0049  0.4365  39.2885 0
3||91   0.3445  91      90      83      3       0       0       0.178   0.1645  0.9948  0.9952  0.1617  0.0048  0.4362  39.6951 0
3||92   0.3428  92      91      84      3       0       0       0.1792  0.1663  0.9947  0.9951  0.1588  0.0047  0.4367  40.1778 0
3||93   0.3398  93      92      85      3       0       0       0.1771  0.1648  0.9948  0.9951  0.1581  0.0046  0.4281  39.8142 0
3||94   0.3403  94      93      86      3       0       0       0.1765  0.1645  0.9948  0.9952  0.1578  0.006   0.5667  53.2701 0
3||95   0.3382  95      94      87      3       0       0       0.1756  0.1637  0.9948  0.9952  0.1565  0.0061  0.5751  54.6366 0
3||96   0.337   96      95      88      3       0       0       0.175   0.1632  0.9948  0.9952  0.156   0.006   0.5772  55.4137 0
3||97   0.3352  97      96      89      3       0       0       0.1738  0.1618  0.9949  0.9952  0.1558  0.0056  0.5465  53.013  0
3||98   0.3346  98      97      90      3       0       0       0.1699  0.1579  0.995   0.9953  0.1584  0.0063  0.6125  60.0286 0
3||99   0.3276  99      98      91      3       0       0       0.1609  0.1489  0.9953  0.9956  0.1619  0.0048  0.4729  46.8199 0
3||100  0.3248  100     99      92      3       0       0       0.1546  0.1423  0.9954  0.9958  0.1655  0.0046  0.4598  45.9756 0
3||101  0.3179  101     100     93      3       0       0       0.1512  0.1396  0.9955  0.9959  0.1619  0.0047  0.4787  48.35   0
3||102  0.3111  102     101     94      3       0       0       0.1446  0.1328  0.9957  0.9961  0.1619  0.0047  0.4748  48.4303 0
3||103  0.3094  103     102     95      3       0       0       0.1459  0.1344  0.9957  0.996   0.1588  0.0047  0.4801  49.4511 0
3||104  0.3052  104     103     96      3       0       0       0.1431  0.1319  0.9958  0.9961  0.1573  0.0048  0.4973  51.7183 0
3||105  0.3013  105     104     97      3       0       0       0.138   0.1265  0.9959  0.9963  0.1592  0.0041  0.4306  45.2159 0
3||106  0.3005  106     105     98      3       0       0       0.1352  0.1238  0.996   0.9964  0.1597  0.0056  0.5914  62.6914 0
3||107  0.2992  107     106     99      3       0       0       0.136   0.1248  0.996   0.9963  0.158   0.0052  0.5604  59.9578 0
3||108  0.296   108     107     100     3       0       0       0.1333  0.1224  0.9961  0.9964  0.1575  0.0053  0.5733  61.9193 0
3||109  0.2947  109     108     101     3       0       0       0.1318  0.121   0.9961  0.9964  0.1577  0.0053  0.5813  63.3669 0
3||110  0.2904  110     109     102     3       0       0       0.1299  0.1192  0.9962  0.9965  0.1566  0.004   0.4389  48.281  0
1431.2766  seconds elapsed
_images/supervision_ordinal_29_1.png
[15]:
st2.tl.use_graph_with_n_nodes(adata2,n_nodes=110)
st2.pl.graph(adata2,key='epg',color=['label'])
_images/supervision_ordinal_30_0.png

Pseudotime and trajectory plots

[25]:
st2.tl.infer_pseudotime(adata2,0)
#jitter to prevent bug for equal pseudotime values
adata2.obs['label_plot']=adata2.obs['label']+1e-20*np.random.random(len(adata2))

st2.pl.stream_sc(adata2,source=0,preference=[],dist_scale=.5,color=['cell_sets_simple'],fig_size=(10,5))

#show
st2.pl.stream(adata2,source=0,preference=[],factor_min_win=2.5,color=['cell_sets_simple'],fig_size=(10,5),
             fig_path=f'figures/reprogramming/shareseq_stream_cell_sets_simple',)
st2.pl.stream(adata2,source=0,preference=[],factor_min_win=2.5,color=['label_plot'],fig_size=(10,5),
             fig_path=f'figures/reprogramming/shareseq_stream_timepoints',)

#save png
st2.pl.stream(adata2,source=0,preference=[],factor_min_win=2.5,color=['cell_sets_simple'],fig_size=(10,5),
             fig_path=f'figures/reprogramming/shareseq_stream_cell_sets_simple',save_fig=True,fig_format='png')
st2.pl.stream(adata2,source=0,preference=[],factor_min_win=2.5,color=['label_plot'],fig_size=(10,5),
             fig_path=f'figures/reprogramming/shareseq_stream_timepoints',save_fig=True,fig_format='png')

#save pdf
st2.pl.stream(adata2,source=0,preference=[],factor_min_win=2.5,color=['cell_sets_simple'],fig_size=(10,5),
             fig_path=f'figures/reprogramming/shareseq_stream_cell_sets_simple',save_fig=True,fig_format='pdf')
st2.pl.stream(adata2,source=0,preference=[],factor_min_win=2.5,color=['label_plot'],fig_size=(10,5),
             fig_path=f'figures/reprogramming/shareseq_stream_timepoints',save_fig=True,fig_format='pdf')
_images/supervision_ordinal_32_0.png
_images/supervision_ordinal_32_1.png
_images/supervision_ordinal_32_2.png
[33]:
f,axs=plt.subplots(1,2,figsize=(15,6))

adata2.obs['cell_sets_simple_plot'] = adata2.obs['cell_sets_simple'].replace('nan','undefined')
annot=['cell_sets_simple_plot','label',]
annot_name=['Cell types','Days',]

nodep=adata2.uns['epg']['node_pos']
edges=adata2.uns['epg']['edge']

adata2.obsm['X_umap']=adata2.obsm['X_pacmap_sup']
for i,key in enumerate(annot):
    ax=axs[i]
    if i==1:
        ax=sc.pl.umap(adata2,color=key,cmap='viridis',ax=ax,show=False)
        for e in edges:
            ax.plot(*nodep[e].T,linewidth=3,color='k')
    else:
        ax=sc.pl.umap(adata2,color=key,ax=ax,show=False)

    ax.set_title(annot_name[i],fontsize=35)
    ax.set_xlabel('PACMAP1',fontsize=25)
    ax.set_ylabel('PACMAP2',fontsize=25)
    if i!=1:
        #ax.legend(prop=dict(size=18),bbox_to_anchor=[1.2, 0.5], loc='center')
        for lh in ax.get_legend().legendHandles:
            lh._sizes = [100]

plt.tight_layout(pad=0,w_pad=1)
plt.savefig(f'figures/reprogramming/pacmap.png',dpi=300,bbox_inches='tight')
plt.savefig(f'figures/reprogramming/pacmap.pdf',dpi=300,bbox_inches='tight')
plt.show()
/home/jo/anaconda3/envs/stream2/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/tmp/ipykernel_3306/3070442868.py:28: MatplotlibDeprecationWarning: The legendHandles attribute was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use legend_handles instead.
  for lh in ax.get_legend().legendHandles:
_images/supervision_ordinal_33_1.png

Save results

[ ]:
#these keys might fail to write
#del adata2.uns['epg']['conn']
#del adata2.uns['seed_epg']['conn']
#del adata2.uns['epg']['graph_evolution']
#del adata2.uns['stream_tree']
#adata2.write('../data/test.h5ad')
[ ]:

[ ]: