-
Notifications
You must be signed in to change notification settings - Fork 2
/
fastas2dist.pl
39 lines (38 loc) · 1022 Bytes
/
fastas2dist.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#This script takes a list of fasta files and calculates the genetic distance between each sample.
#Requires all bases to be in one line (not multiline fasta)
#Fasta sequences must all be the same length and not have multi-nucleotide IUPAC bases.
#!/bin/perl
use strict;
use warnings;
my %bases;
my %names;
my $counter = 1;
foreach my $i (0..$#ARGV){
my $file = $ARGV[$i];
open FILE, $file;
while(<FILE>){
chomp;
if (/^>/){
$names{$counter} = $_;
}else{
$bases{$counter} = $_;
$counter++;
}
}
}
foreach my $i (1..($counter-1)){
foreach my $j (($i+1)..($counter-1)){
print "$names{$i}|$names{$j}|";
my @nuc1 = split(//,$bases{$i});
my @nuc2 = split(//,$bases{$j});
my $length= $#nuc1;
my $dist = 0;
foreach my $n (0..$length){
if ($nuc1[$n] ne $nuc2[$n]){
$dist++;
}
}
my $rel_dist = $dist / $length;
print "$rel_dist\n";
}
}