-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
162 lines (145 loc) · 3.99 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
package main
import (
"errors"
"math"
"os"
"regexp"
"github.com/evolbioinfo/gotree/io/newick"
"github.com/evolbioinfo/gotree/tree"
"github.com/spf13/cobra"
)
type tipbag map[*tree.Node]bool
var flagsTree string
var flagsRegex string
func init() {
mainCmd.Flags().StringVarP(&flagsTree, "tree", "t", "", "tree file to read (in Newick format)")
mainCmd.Flags().StringVarP(&flagsRegex, "regex", "r", "", "regex of tip names to parse")
}
var mainCmd = &cobra.Command{
Use: "",
Short: "",
Long: ``,
RunE: func(cmd *cobra.Command, args []string) error {
err := mrca(flagsTree, flagsRegex)
return err
},
}
func main() {
err := mainCmd.Execute()
if err != nil {
os.Exit(1)
}
}
// mrca is the top-level function aside from main
func mrca(treeFile, re string) error {
// Open the file containing the tree
f, err := os.Open(treeFile)
if err != nil {
return err
}
defer f.Close()
// Parse it into a gotree Tree (t)
t, err := newick.NewParser(f).Parse()
if err != nil {
return err
}
// The input tree needs to be rooted for the routines below to work
if !t.Rooted() {
return errors.New("input tree is not rooted")
}
// tb is a tipbag which contains all the tips whose names match the
// regular expression provided on the command line
tb, err := relevantTips(t, re)
if err != nil {
return err
}
if len(tb) == 0 {
return errors.New("no tips found matching regex")
}
// get the most recent common ancestor of the relevant tips
mrca := getMRCA(t, tb)
if len(mrca.Name()) > 0 {
os.Stdout.WriteString(mrca.Name() + "\n")
} else if mrca == t.Root() {
os.Stdout.WriteString("root\n")
} else {
return errors.New("MRCA node has no name")
}
return nil
}
// Given a (gotree) tree and the string representation of a regular expression,
// return a tipbag containing all the tips that match the regexp.
func relevantTips(t *tree.Tree, re string) (tipbag, error) {
tb := make(map[*tree.Node]bool)
for _, t := range t.Tips() {
m, err := regexp.MatchString(re, t.Name())
if err != nil {
return tb, err
}
if m {
tb[t] = true
}
}
return tb, nil
}
// getMRCA does all the work given the tree and the bag of relevant tips.
// It populations a map of each interior node: its child tips (as long as
// they contain all of the tips we want in tb), and finally returns the node
// with the fewest number of child tips overall: this is the MRCA of the
// tipbag.
func getMRCA(t *tree.Tree, tb tipbag) *tree.Node {
// an empty map to populate with candidate MRCAs
childMap := make(map[*tree.Node][]*tree.Node)
// starting at the root, traverse every node (recursively) and add it to
// childMap if all its child tips are a superset of the tips in tb
childTips(t.Root(), nil, tb, childMap)
// populate a final map from node -> number of child tips
finalMap := make(map[*tree.Node]int)
for parent, children := range childMap {
finalMap[parent] = len(children)
}
// the MRCA is the node with the fewest number of children overall
min := math.MaxInt
var mrca *tree.Node
for parent, nchildren := range finalMap {
if nchildren < min {
mrca = parent
min = nchildren
}
}
return mrca
}
// (Recursively) check each node in the tree for whether its descendant nodes contain all
// the relevant tips, and add them to a map if they do
func childTips(cur, prev *tree.Node, tb tipbag, m map[*tree.Node][]*tree.Node) []*tree.Node {
children := make([]*tree.Node, 0)
for _, neighbour := range cur.Neigh() {
if neighbour != prev {
if neighbour.Tip() {
children = append(children, neighbour)
} else {
ch := childTips(neighbour, cur, tb, m)
children = append(children, ch...)
}
}
}
if isSubset(tb, children) {
m[cur] = children
}
return children
}
// isSubset returns true/false the tips in the tipbag are a subset
// of the tips in others
func isSubset(relevant tipbag, others []*tree.Node) bool {
o := make(map[*tree.Node]bool)
for _, node := range others {
o[node] = true
}
for node, _ := range relevant {
_, ok := o[node]
if !ok {
return false
}
}
return true
}