summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Cully <bjc@kublai.com>2022-07-01 15:51:44 -0400
committerBrian Cully <bjc@kublai.com>2022-07-01 15:51:44 -0400
commit630d01364bdb9128fed07a5efa8f3be8091b50f5 (patch)
tree2e3bcf3eb00940428a43f26cce9913ce9ccc3d7f
parent9bdab828813b71dd10f8f016c09af95ae145d090 (diff)
downloadfav-630d01364bdb9128fed07a5efa8f3be8091b50f5.tar.gz
fav-630d01364bdb9128fed07a5efa8f3be8091b50f5.zip
Add script for printing a protein's codon sequence.
-rwxr-xr-xcodon2aa.pl70
1 files changed, 70 insertions, 0 deletions
diff --git a/codon2aa.pl b/codon2aa.pl
new file mode 100755
index 0000000..a9b9e8d
--- /dev/null
+++ b/codon2aa.pl
@@ -0,0 +1,70 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use Data::Dumper;
+use IO::File;
+
+my $CODON_TO_AA_FILENAME = 'codon-amino-acid.csv';
+my $AA_TO_LETTER_FILENAME = 'amino-acid-letters.csv';
+
+&usage unless $#ARGV == 1;
+
+my %codon2aa = &csv2kvp($CODON_TO_AA_FILENAME);
+my %aa2letter = &csv2kvp($AA_TO_LETTER_FILENAME);
+
+my $genomefh = IO::File->new($ARGV[0])
+ or die "Couldn't open $ARGV[0] for reading: $!\n";
+my $genome = <$genomefh>;
+chomp $genome;
+
+my $sequences = IO::File->new($ARGV[1])
+ or die "Couldn't open $ARGV[1] for reading: $!\n";
+$_ = <$sequences>;
+while (<$sequences>) {
+ chomp;
+ my ($name, $start, $end) = split /,/;
+
+ # Lengths are inclusive, so add 1 to them.
+ my $protein_len = $end - $start + 1;
+ unless ($protein_len % 3 == 0) {
+ print STDERR "Protein $name has improper length $protein_len.\n";
+ next;
+ }
+
+ my $aaseq = '';
+ # Decrement $start to change from the input's 1-indexed array to
+ # perl's 0-indexed.
+ for ($start--; $start < $end; $start += 3) {
+ my $codon = substr $genome, $start, 3;
+ my $aa = $aa2letter{$codon2aa{$codon}};
+ $aaseq .= $aa;
+ }
+ print "$name,$aaseq\n";
+}
+
+sub usage {
+ print STDERR "Usage: $0 genome protein-sequence.csv\n";
+ exit 1;
+}
+
+sub csv2kvp {
+ my ($filename) = @_;
+
+ my $fh = IO::File->new($filename)
+ or die "Couldn't open $filename for reading: $!\n";
+
+ # Drop the header line since we're returning a direct map of keys
+ # to values.
+ $_ = <$fh>;
+
+ my %rc = ();
+ while (<$fh>) {
+ chomp;
+ my ($k, $v) = split /,/;
+ $rc{$k} = $v;
+ }
+
+ %rc
+}