1 |
my %genetic_code = ( |
2 |
'TCA' => 'S', 'TCC' => 'S', 'TCG' => 'S', 'TCT' => 'S', |
3 |
'TTC' => 'F', 'TTT' => 'F', 'TTA' => 'L', 'TTG' => 'L', |
4 |
'TAC' => 'Y', 'TAT' => 'Y', 'TAA' => '_', 'TAG' => '_', |
5 |
'TGC' => 'C', 'TGT' => 'C', 'TGA' => '_', 'TGG' => 'W', |
6 |
'CTA' => 'L', 'CTC' => 'L', 'CTG' => 'L', 'CTT' => 'L', |
7 |
'CCA' => 'P', 'CCC' => 'P', 'CCG' => 'P', 'CCT' => 'P', |
8 |
'CAC' => 'H', 'CAT' => 'H', 'CAA' => 'Q', 'CAG' => 'Q', |
9 |
'CGA' => 'R', 'CGC' => 'R', 'CGG' => 'R', 'CGT' => 'R', |
10 |
'ATA' => 'I', 'ATC' => 'I', 'ATT' => 'I', 'ATG' => 'M', |
11 |
'ACA' => 'T', 'ACC' => 'T', 'ACG' => 'T', 'ACT' => 'T', |
12 |
'AAC' => 'N', 'AAT' => 'N', 'AAA' => 'K', 'AAG' => 'K', |
13 |
'AGC' => 'S', 'AGT' => 'S', 'AGA' => 'R', 'AGG' => 'R', |
14 |
'GTA' => 'V', 'GTC' => 'V', 'GTG' => 'V', 'GTT' => 'V', |
15 |
'GCA' => 'A', 'GCC' => 'A', 'GCG' => 'A', 'GCT' => 'A', |
16 |
'GAC' => 'D', 'GAT' => 'D', 'GAA' => 'E', 'GAG' => 'E', |
17 |
'GGA' => 'G', 'GGC' => 'G', 'GGG' => 'G', 'GGT' => 'G', |
18 |
); |
19 |
|
20 |
sub codon2aa { |
21 |
my ( $codon ) = @_; |
22 |
|
23 |
# check does mapping for codon exists |
24 |
if ( exists $genetic_code{ $codon } ) { |
25 |
# if it does, return amino acid |
26 |
return $genetic_code{ $codon }; |
27 |
} else { |
28 |
# if it doesn't exit with error |
29 |
die "bad codon: $codon"; |
30 |
} |
31 |
} |
32 |
|
33 |
sub DNA2protein { |
34 |
my ( $DNA, $offset ) = @_; |
35 |
my $protein = ''; |
36 |
|
37 |
# start at $offset and move by three places through DNA |
38 |
for ( my $i=$offset; $i<=(length($DNA)-2-$offset); $i+=3 ) { |
39 |
# extract single codon starting at position $i |
40 |
my $codon = substr( $DNA, $i, 3 ); |
41 |
# # decode codon to amino acid |
42 |
$protein .= codon2aa( $codon ); |
43 |
} |
44 |
# return created protein |
45 |
return $protein; |
46 |
} |
47 |
|
48 |
sub revcom { |
49 |
my ( $DNA ) = @_; |
50 |
my $revcom = reverse $DNA; |
51 |
$revcom =~ tr/ACGT/TGCA/; |
52 |
return $revcom; |
53 |
} |