|
| 1 | +import sys |
| 2 | +import anndata as ad |
| 3 | +import cellmapper as cm |
| 4 | +from scipy.sparse import csc_matrix |
| 5 | + |
| 6 | +## VIASH START |
| 7 | +# Note: this section is auto-generated by viash at runtime. To edit it, make changes |
| 8 | +# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`. |
| 9 | +par = { |
| 10 | + 'input_train_mod1': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_multiome/swap/train_mod1.h5ad', |
| 11 | + 'input_train_mod2': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_multiome/swap/train_mod2.h5ad', |
| 12 | + 'input_test_mod1': 'resources_test/task_predict_modality/openproblems_neurips2021/bmmc_multiome/swap/test_mod1.h5ad', |
| 13 | + 'output': 'output.h5ad', |
| 14 | + 'n_neighbors': 30, |
| 15 | + 'kernel_method': 'hnoca', |
| 16 | + 'use_hvg': False, |
| 17 | + 'adt_normalization': 'clr', # Normalization method for ADT data |
| 18 | + 'plot_umap': True, |
| 19 | + |
| 20 | +} |
| 21 | +meta = { |
| 22 | + 'name': 'cellmapper_scvi', |
| 23 | + 'resources_dir': 'target/executable/methods/cellmapper_scvi', |
| 24 | +} |
| 25 | +## VIASH END |
| 26 | + |
| 27 | +sys.path.append(meta['resources_dir']) |
| 28 | +from utils import get_representation |
| 29 | + |
| 30 | +print('Reading input files', flush=True) |
| 31 | +input_train_mod1 = ad.read_h5ad(par['input_train_mod1']) |
| 32 | +input_train_mod2 = ad.read_h5ad(par['input_train_mod2']) |
| 33 | +input_test_mod1 = ad.read_h5ad(par['input_test_mod1']) |
| 34 | + |
| 35 | +mod1 = input_train_mod1.uns['modality'] |
| 36 | +mod2 = input_train_mod2.uns['modality'] |
| 37 | +print(f"Modality 1: {mod1}, n_features: {input_train_mod1.n_vars}", flush=True) |
| 38 | +print(f"Modality 2: {mod2}, n_features: {input_train_mod2.n_vars}", flush=True) |
| 39 | + |
| 40 | +print("Concatenating train and test data", flush=True) |
| 41 | +adata = ad.concat( |
| 42 | + [input_train_mod1, input_test_mod1], merge = "same", label="split", keys=["train", "test"] |
| 43 | + ) |
| 44 | + |
| 45 | +# Compute a latent representation using an appropriate model based on the modality |
| 46 | +print("Get latent representation", flush=True) |
| 47 | +adata = get_representation( |
| 48 | + adata=adata, modality=mod1, use_hvg=par['use_hvg'], adt_normalization=par['adt_normalization'], plot_umap=par['plot_umap'] |
| 49 | + ) |
| 50 | + |
| 51 | +# Place the representation back into individual objects |
| 52 | +input_train_mod1.obsm["X_scvi"] = adata[adata.obs["split"] == "train"].obsm["X_scvi"].copy() |
| 53 | +input_test_mod1.obsm["X_scvi"] = adata[adata.obs["split"] == "test"].obsm["X_scvi"].copy() |
| 54 | + |
| 55 | +# copy the normalized layer to obsm for mod2 |
| 56 | +input_train_mod1.obsm["mod2"] = input_train_mod2.layers["normalized"] |
| 57 | + |
| 58 | +print('Setup and prepare Cellmapper', flush=True) |
| 59 | +cmap = cm.CellMapper(query=input_test_mod1, reference=input_train_mod1) |
| 60 | +cmap.compute_neighbors( |
| 61 | + use_rep="X_scvi", |
| 62 | + n_neighbors=par['n_neighbors'], |
| 63 | + ) |
| 64 | +cmap.compute_mapping_matrix(kernel_method=par['kernel_method']) |
| 65 | + |
| 66 | +print("Predict on test data", flush=True) |
| 67 | +cmap.map_obsm(key="mod2", prediction_postfix="pred") |
| 68 | +mod2_pred = csc_matrix(cmap.query.obsm["mod2_pred"]) |
| 69 | + |
| 70 | +print("Write output AnnData to file", flush=True) |
| 71 | +output = ad.AnnData( |
| 72 | + layers={"normalized": mod2_pred}, |
| 73 | + obs=input_test_mod1.obs, |
| 74 | + var=input_train_mod2.var, |
| 75 | + uns={ |
| 76 | + 'dataset_id': input_train_mod1.uns['dataset_id'], |
| 77 | + 'method_id': meta["name"], |
| 78 | + }, |
| 79 | +) |
| 80 | +output.write_h5ad(par['output'], compression='gzip') |
0 commit comments