-
Notifications
You must be signed in to change notification settings - Fork 91
Expand file tree
/
Copy pathsimple_train.yml
More file actions
52 lines (52 loc) · 1.75 KB
/
simple_train.yml
File metadata and controls
52 lines (52 loc) · 1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
---
ops: [train, evaluate]
model: {
# TODO: update this line with the absolute path to the file.
path: <ABSOLUTE_PATH>/tutorials/getting_started_with_selene/deeperdeepsea.py,
class: DeeperDeepSEA,
class_args: {
sequence_length: 1000,
n_targets: 1,
},
non_strand_specific: mean
}
sampler: !obj:selene_sdk.samplers.IntervalsSampler {
reference_sequence: !obj:selene_sdk.sequences.Genome {
# we include relative paths here, but we recommend using absolute
# paths for future configuration files
input_path: ./male.hg19.fasta
},
features: !obj:selene_sdk.utils.load_features_list {
input_path: ./distinct_features.txt
},
target_path: ./sorted_GM12878_CTCF.bed.gz,
intervals_path: ./deepsea_TF_intervals.txt,
seed: 127,
# A positive example is an 1000bp sequence with at least 1 class/feature annotated to it.
# A negative sample has no classes/features annotated to the sequence.
sample_negative: True,
sequence_length: 1000,
center_bin_to_predict: 200,
test_holdout: [chr8, chr9],
validation_holdout: [chr6, chr7],
# The feature must take up 50% of the bin (200bp) for it to be considered
# a feature annotated to that sequence.
feature_thresholds: 0.5,
mode: train,
save_datasets: [validate, test]
}
train_model: !obj:selene_sdk.TrainModel {
batch_size: 64,
max_steps: 8000, # update this value for longer training
report_stats_every_n_steps: 1000,
n_validation_samples: 32000,
n_test_samples: 120000,
cpu_n_threads: 32,
use_cuda: True, # TODO: update this if CUDA is not on your machine
data_parallel: False
}
random_seed: 1447
output_dir: ./training_outputs
create_subdirectory: True
load_test_set: False
...