-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtRNAscan-SEtoBED.py
More file actions
executable file
·70 lines (61 loc) · 2.43 KB
/
tRNAscan-SEtoBED.py
File metadata and controls
executable file
·70 lines (61 loc) · 2.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#! /usr/bin/python3
import sys
"""Script to convert tRNAscan-SE output to a BED file
appropriate for UCSC geneome browser.
tRNAscan-SE output should be on stdin,
BED file is on stdout.
Author: Dominik Bujna
"""
tRNA_type_number = {}
for line in sys.stdin:
columns = line.split()
#check if the line contains an entry
if columns[1].isdigit():
chromosomeName = columns[0]
#start & end, smaller first, reposition to start at 0th position
if int(columns[2]) < int(columns[3]):
chromStart = int(columns[2]) - 1
chromEnd = int(columns[3])
is_plus_strand = True
else:
chromStart = int(columns[3]) - 1
chromEnd = int(columns[2])
is_plus_strand = False
#generate the name
name = "tRNA-" + columns[4] + "-" + columns[5]
if name not in tRNA_type_number.keys():
tRNA_type_number[name] = 1
else:
tRNA_type_number[name] += 1
name += str(tRNA_type_number[name])
#recalculate the score value from 0-100 to 0-1000
score = int(float(columns[8])*10)
#check if there is an intron
if int(columns[6]) == 0 and int(columns[7]) == 0:
blockCount = 1
blockStarts = "0"
blockSizes = chromEnd - chromStart
else:
#tRNAscan-SE seems to give max 1 intron, hence 2 blocks if there is one
blockCount = 2
#correct for the strand direction and start at 0
if is_plus_strand:
intronStart = int(columns[6]) - 1
exonStart = int(columns[7])
else:
intronStart = int(columns[7]) - 1
exonStart = int(columns[6])
#positions with chromStart as 0
blockStarts = "0," + str(exonStart - chromStart)
blockSizes = str(intronStart - chromStart) + "," + str(chromEnd - exonStart)
#put together the processed information
output = chromosomeName + "\t" + str(chromStart) + "\t" + str(chromEnd) + "\t" + name + "\t" + str(score) + "\t"
if is_plus_strand:
output += "+\t"
else:
output += "-\t"
#thickly drawn part: empty (both ends at start); color black
output += str(chromStart) + "\t" + str(chromStart) + "\t0,0,0\t"
#add intron/exon info
output += str(blockCount) + "\t" + str(blockSizes) + "\t" + str(blockStarts)
print(output)