-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathRawSeq.pir
More file actions
86 lines (65 loc) · 2.83 KB
/
RawSeq.pir
File metadata and controls
86 lines (65 loc) · 2.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#
# A structure for maintaining the information about a single
# molecular sequence that is part of a RawDiskSeqs collection
#
- PerlClass PirObject::RawSeq
- InheritsFrom PirObject
- FieldsTable
# Field name Sing/Array/Hash Type Comments
#---------------------- --------------- --------------- -----------------------
id single string Internal ID (usually "S0xNNNN" where NNN is hex)
fastaheader single string
startoffset single int4 Byte position in raw file
seqlength single int4
# The following fields are used for subregions of a genome, but not for the main genome
parentId single string
parentName single string from fastaheader of parent
parentStart single int4 Start always < stop
parentStop single int4 Stop always > start
parentStrand single string "+" or "-"
- EndFieldsTable
- Methods
our $RCS_VERSION='$Id: RawSeq.pir,v 1.4 2008/08/20 19:43:22 riouxp Exp $';
our ($VERSION) = ($RCS_VERSION =~ m#,v ([\w\.]+)#);
sub GetSubseq {
my $self = shift;
my $start = shift;
my $len = shift;
my $class = ref($self);
die "This is an instance method.\n" unless $class;
die "Error: len must be positive.\n" unless $len >= 0;
my $fh = $self->{'_tiedFh_'};
die "Object not yet tied to a file on disk?!?\n" unless $fh;
my $rawstart = $self->get_startoffset();
my $rawlen = $self->get_seqlength();
if ($start < 0 || $start >= $rawlen) {
die "Error: start position '$start' is outside of sequence boundaries 0 .. " . ($rawlen-1) . "\n";
}
if ($start+$len > $rawlen) {
die "Error: with start '$start', length '$len' is outside of sequence boundaries 0 .. " . ($rawlen-1) . "\n";
}
return "" if $len == 0;
my $realstart = $rawstart+$start;
my $subseq = "";
$fh->sysseek($realstart,0);
my $found = $fh->sysread($subseq,$len);
#print STDERR "Extracting at $realstart for $len\n";
if (!defined($found) || $found != $len) {
die "Error: could not read $len bytes from tied filehandle; got '" . (defined($found) ? $found : "undef") . "' bytes instead.\n";
}
$subseq;
}
sub GetSubseqReverse {
my $self = shift;
my $start = shift;
my $len = shift;
my $class = ref($self);
die "This is an instance method.\n" unless $class;
die "Error: len must be positive.\n" unless $len > 0;
my $seqlength = $self->get_seqlength();
my $newstart = $seqlength-1-$start-($len-1); # I know the +1 -1 cancels each other
my $subseq = $self->GetSubseq($newstart,$len);
$subseq = reverse($subseq);
$subseq =~ tr/acgtACGT/tgcaTGCA/;
$subseq;
}