-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathfcs_functions.py
More file actions
56 lines (45 loc) · 1.87 KB
/
fcs_functions.py
File metadata and controls
56 lines (45 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from __future__ import annotations
import random
import anndata as ad
import numpy as np
from flowsom.io import read_FCS
from flowsom.tl import get_markers
def aggregate_flowframes(files, c_total, channels=None, keep_order=False):
"""Aggregate multiple FCS files together.
:param files: An array/list containing full paths to the FCS files or fcs files as anndata objects
:type files: np.array
:param c_total: Total number of cells to write to the output file
:type c_total: int
:param channels: Channels/markers to keep in the aggregate. Default None
takes all channels of the first file
:type channels: np.array
:param keep_order: If True, the random subsample will be ordered in the same
way as they were originally ordered in the file. Default=False.
:type keep_order: boolean
:param silent: If False, prints an update every time it starts processing a
new file. Default = False.
:type silent: boolean.
"""
nFiles = len(files)
cFile = int(np.ceil(c_total / nFiles))
flow_frame = []
for i, f in enumerate(files):
if not isinstance(f, ad.AnnData):
f = read_FCS(f)
if channels is not None:
f = f[:, list(get_markers(f, channels).keys())]
cPerFile = min([f.X.shape[0], cFile])
# Random sampling
ids = random.sample(range(f.X.shape[0]), cPerFile)
if keep_order:
ids = sorted(ids)
file_ids = np.repeat(i, cPerFile)
f = f[ids,]
f.obs["Original_ID"] = np.array(ids, dtype=np.float32)
f.obs["File"] = np.array(file_ids, dtype=np.float32)
f.obs["File_scattered"] = np.array(
np.add(file_ids, np.random.normal(loc=0.0, scale=0.1, size=len(file_ids))), dtype=np.float32
)
flow_frame.append(f)
flow_frame = ad.concat(flow_frame, join="outer", uns_merge="first")
return flow_frame