From 4faf99084550cc031fda86e70c2f0e163c277504 Mon Sep 17 00:00:00 2001 From: Martin/Geno Date: Sat, 19 Jan 2019 18:45:59 +0100 Subject: [PATCH 1/3] [TASK] add no-respondd --- config_example.toml | 6 ++++++ database/graphite/global.go | 1 + database/influxdb/global.go | 13 +++++++------ database/logging/file.go | 2 +- docs/docs_configuration.md | 22 +++++++++++++++++++++ runtime/node.go | 1 + runtime/nodes.go | 11 ++++++++++- runtime/nodes_config.go | 2 ++ runtime/nodes_ping.go | 38 +++++++++++++++++++++++++++++++++++++ runtime/nodes_ping_test.go | 38 +++++++++++++++++++++++++++++++++++++ runtime/stats.go | 16 ++++++++++------ runtime/stats_test.go | 4 +++- 12 files changed, 139 insertions(+), 15 deletions(-) create mode 100644 runtime/nodes_ping.go create mode 100644 runtime/nodes_ping_test.go diff --git a/config_example.toml b/config_example.toml index a5a3b530..1eedb6f0 100644 --- a/config_example.toml +++ b/config_example.toml @@ -55,6 +55,12 @@ save_interval = "5s" # Set node to offline if not seen within this period offline_after = "10m" +## Verify if node is really down by ping last seen address of node +# send x pings to verify if node is offline (for disable set count < 1) +ping_count = 3 +# timeout of sending ping to a node +ping_timeout = "1s" + ## [[nodes.output.example]] # Each output format has its own config block and needs to be enabled by adding: diff --git a/database/graphite/global.go b/database/graphite/global.go index 62fff662..0cd2cf68 100644 --- a/database/graphite/global.go +++ b/database/graphite/global.go @@ -36,6 +36,7 @@ func (c *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time, s func GlobalStatsFields(name string, stats *runtime.GlobalStats) []graphigo.Metric { return []graphigo.Metric{ {Name: name + ".nodes", Value: stats.Nodes}, + {Name: name + ".nodes.no_respondd", Value: stats.NodesNoRespondd}, {Name: name + ".gateways", Value: stats.Gateways}, {Name: name + ".clients.total", Value: stats.Clients}, {Name: name + ".clients.wifi", Value: stats.ClientsWifi}, diff --git a/database/influxdb/global.go b/database/influxdb/global.go index 6c4330eb..90b82b90 100644 --- a/database/influxdb/global.go +++ b/database/influxdb/global.go @@ -42,12 +42,13 @@ func (conn *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time // GlobalStatsFields returns fields for InfluxDB func GlobalStatsFields(stats *runtime.GlobalStats) map[string]interface{} { return map[string]interface{}{ - "nodes": stats.Nodes, - "gateways": stats.Gateways, - "clients.total": stats.Clients, - "clients.wifi": stats.ClientsWifi, - "clients.wifi24": stats.ClientsWifi24, - "clients.wifi5": stats.ClientsWifi5, + "nodes": stats.Nodes, + "nodes.no_respondd": stats.NodesNoRespondd, + "gateways": stats.Gateways, + "clients.total": stats.Clients, + "clients.wifi": stats.ClientsWifi, + "clients.wifi24": stats.ClientsWifi24, + "clients.wifi5": stats.ClientsWifi5, } } diff --git a/database/logging/file.go b/database/logging/file.go index c9745b0f..47686164 100644 --- a/database/logging/file.go +++ b/database/logging/file.go @@ -50,7 +50,7 @@ func (conn *Connection) InsertLink(link *runtime.Link, time time.Time) { } func (conn *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time, site string, domain string) { - conn.log("InsertGlobals: [", time.String(), "] site: ", site, " domain: ", domain, ", nodes: ", stats.Nodes, ", clients: ", stats.Clients, " models: ", len(stats.Models)) + conn.log("InsertGlobals: [", time.String(), "] site: ", site, " domain: ", domain, ", nodes: ", stats.Nodes, " (no respondd: ", stats.NodesNoRespondd, "), clients: ", stats.Clients, " models: ", len(stats.Models)) } func (conn *Connection) PruneNodes(deleteAfter time.Duration) { diff --git a/docs/docs_configuration.md b/docs/docs_configuration.md index 4c5d4067..1291dd42 100644 --- a/docs/docs_configuration.md +++ b/docs/docs_configuration.md @@ -203,6 +203,8 @@ state_path = "/var/lib/yanic/state.json" prune_after = "7d" save_interval = "5s" offline_after = "10m" +ping_count = 3 +ping_timeout = "1s" ``` {% endmethod %} @@ -246,6 +248,26 @@ offline_after = "10m" ``` {% endmethod %} +### ping_count +{% method %} +Verify if node is really down by ping last seen address of node +send x pings to verify if node is offline (for disable set count < 1) +{% sample lang="toml" %} +```toml +ping_count = 3 +``` +{% endmethod %} + + +### ping_timeout +{% method %} +Timeout of sending ping to a node +{% sample lang="toml" %} +```toml +ping_timeout = "1s" +``` +{% endmethod %} + ## [[nodes.output.example]] {% method %} diff --git a/runtime/node.go b/runtime/node.go index b10be10b..cff23e38 100644 --- a/runtime/node.go +++ b/runtime/node.go @@ -13,6 +13,7 @@ type Node struct { Firstseen jsontime.Time `json:"firstseen"` Lastseen jsontime.Time `json:"lastseen"` Online bool `json:"online"` + NoRespondd bool `json:"-"` Statistics *data.Statistics `json:"statistics"` Nodeinfo *data.Nodeinfo `json:"nodeinfo"` Neighbours *data.Neighbours `json:"-"` diff --git a/runtime/nodes.go b/runtime/nodes.go index a4f0b089..8e19d2a6 100644 --- a/runtime/nodes.go +++ b/runtime/nodes.go @@ -176,7 +176,16 @@ func (nodes *Nodes) expire() { delete(nodes.List, id) } else if node.Lastseen.Before(offlineAfter) { // set to offline - node.Online = false + if nodes.config.PingCount > 0 && nodes.ping(node) { + node.Online = true + node.NoRespondd = true + + node.Statistics = nil + node.Neighbours = nil + } else { + node.Online = false + node.NoRespondd = false + } } } } diff --git a/runtime/nodes_config.go b/runtime/nodes_config.go index 6d9520ce..69dbf90f 100644 --- a/runtime/nodes_config.go +++ b/runtime/nodes_config.go @@ -7,5 +7,7 @@ type NodesConfig struct { SaveInterval duration.Duration `toml:"save_interval"` // Save nodes periodically OfflineAfter duration.Duration `toml:"offline_after"` // Set node to offline if not seen within this period PruneAfter duration.Duration `toml:"prune_after"` // Remove nodes after n days of inactivity + PingCount int `toml:"ping_count"` // send x pings to verify if node is offline (for disable count < 1) + PingTimeout duration.Duration `toml:"ping_timeout"` // timeout of sending ping to a node Output map[string]interface{} } diff --git a/runtime/nodes_ping.go b/runtime/nodes_ping.go new file mode 100644 index 00000000..d5f7e535 --- /dev/null +++ b/runtime/nodes_ping.go @@ -0,0 +1,38 @@ +package runtime + +import ( + "github.com/bdlm/log" + "github.com/sparrc/go-ping" +) + +func (nodes *Nodes) ping(node *Node) bool { + logNode := log.WithField("node_id", "unknown") + if node.Nodeinfo != nil { + logNode = logNode.WithField("node_id", node.Nodeinfo.NodeID) + } + if node.Address == nil { + logNode.Debug("error no address found") + return false + } + addr := node.Address.IP.String() + if node.Address.IP.IsLinkLocalUnicast() { + addr += "%" + node.Address.Zone + } + + logAddr := logNode.WithField("addr", addr) + + pinger, err := ping.NewPinger(addr) + if err != nil { + logAddr.Debugf("error during ping: %s", err) + return false + } + //pinger.SetPrivileged(true) + pinger.Count = nodes.config.PingCount + pinger.Timeout = nodes.config.PingTimeout.Duration + pinger.Run() // blocks until finished + stats := pinger.Statistics() + logAddr.WithFields(map[string]interface{}{ + "pkg_lost": stats.PacketLoss, + }).Debug("pong") + return stats.PacketLoss < 100 +} diff --git a/runtime/nodes_ping_test.go b/runtime/nodes_ping_test.go new file mode 100644 index 00000000..170f4d14 --- /dev/null +++ b/runtime/nodes_ping_test.go @@ -0,0 +1,38 @@ +package runtime + +import ( + "net" + "testing" + "time" + + "github.com/bdlm/log" + "github.com/stretchr/testify/assert" + + "github.com/FreifunkBremen/yanic/data" +) + +func TestPing(t *testing.T) { + log.SetLevel(log.DebugLevel) + + assert := assert.New(t) + config := &NodesConfig{ + PingCount: 1, + } + config.OfflineAfter.Duration = time.Minute * 10 + // to get default (100%) path of testing + // config.PruneAfter.Duration = time.Hour * 24 * 6 + nodes := &Nodes{ + config: config, + List: make(map[string]*Node), + ifaceToNodeID: make(map[string]string), + } + + node := nodes.Update("expire", &data.ResponseData{NodeInfo: &data.NodeInfo{NodeID: "nodeID-Lola"}}) + node.Address = &net.UDPAddr{Zone: "bat0"} + // error during ping + assert.False(nodes.ping(node)) + + node.Address.IP = net.ParseIP("fe80::1") + // error during ping + assert.False(nodes.ping(node)) +} diff --git a/runtime/stats.go b/runtime/stats.go index 145d8bb2..bdb7d292 100644 --- a/runtime/stats.go +++ b/runtime/stats.go @@ -11,12 +11,13 @@ type CounterMap map[string]uint32 // GlobalStats struct type GlobalStats struct { - Clients uint32 - ClientsWifi uint32 - ClientsWifi24 uint32 - ClientsWifi5 uint32 - Gateways uint32 - Nodes uint32 + Clients uint32 + ClientsWifi uint32 + ClientsWifi24 uint32 + ClientsWifi5 uint32 + Gateways uint32 + Nodes uint32 + NodesNoRespondd uint32 Firmwares CounterMap Models CounterMap @@ -81,6 +82,9 @@ func (s *GlobalStats) Add(node *Node) { s.ClientsWifi5 += stats.Clients.Wifi5 s.ClientsWifi += stats.Clients.Wifi } + if node.NoRespondd { + s.NodesNoRespondd++ + } if node.IsGateway() { s.Gateways++ } diff --git a/runtime/stats_test.go b/runtime/stats_test.go index d68fee07..d905b7a6 100644 --- a/runtime/stats_test.go +++ b/runtime/stats_test.go @@ -22,6 +22,7 @@ func TestGlobalStats(t *testing.T) { //check GLOBAL_SITE stats assert.EqualValues(1, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Gateways) assert.EqualValues(3, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Nodes) + assert.EqualValues(1, stats[GLOBAL_SITE][GLOBAL_DOMAIN].NodesNoRespondd) assert.EqualValues(25, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Clients) // check models @@ -98,7 +99,8 @@ func createTestNodes() *Nodes { nodes.AddNode(nodeData) nodes.AddNode(&Node{ - Online: true, + Online: true, + NoRespondd: true, Statistics: &data.Statistics{ Clients: data.Clients{ Total: 2, From 729527733499f6005caee8f4f27a0a774a05d4f9 Mon Sep 17 00:00:00 2001 From: Martin/Geno Date: Sat, 19 Jan 2019 21:59:43 +0100 Subject: [PATCH 2/3] add fallback for addresses --- INSTALL.md | 6 ++++++ docs/docs_install.md | 6 ++++++ runtime/node.go | 2 +- runtime/nodes.go | 32 +++++++++++++++++++++----------- runtime/nodes_ping.go | 23 ++++++++++++++++------- runtime/nodes_ping_test.go | 8 +++++++- 6 files changed, 57 insertions(+), 20 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 8d890e11..d52e741a 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -25,6 +25,12 @@ As root: go get -v -u github.com/FreifunkBremen/yanic ``` +### allow to ping +only needed if config has `nodes.ping_count` > 0 +```sh +sudo setcap cap_net_raw=+ep /opt/go/bin/yanic +``` + #### Work with other databases If you like to use another database solution than influxdb, Pull Requests are welcome. Just fork this project and create another subpackage within the folder diff --git a/docs/docs_install.md b/docs/docs_install.md index 388eade3..54a9b565 100644 --- a/docs/docs_install.md +++ b/docs/docs_install.md @@ -26,6 +26,12 @@ As root: go get -v -u github.com/FreifunkBremen/yanic ``` +### allow to ping +only needed if config has `nodes.ping_count` > 0 +```sh +sudo setcap cap_net_raw=+ep /opt/go/bin/yanic +``` + ### Install ```sh diff --git a/runtime/node.go b/runtime/node.go index cff23e38..c689fa93 100644 --- a/runtime/node.go +++ b/runtime/node.go @@ -13,7 +13,7 @@ type Node struct { Firstseen jsontime.Time `json:"firstseen"` Lastseen jsontime.Time `json:"lastseen"` Online bool `json:"online"` - NoRespondd bool `json:"-"` + NoRespondd bool `json:"no_respondd"` Statistics *data.Statistics `json:"statistics"` Nodeinfo *data.Nodeinfo `json:"nodeinfo"` Neighbours *data.Neighbours `json:"-"` diff --git a/runtime/nodes.go b/runtime/nodes.go index 8e19d2a6..7e25eb51 100644 --- a/runtime/nodes.go +++ b/runtime/nodes.go @@ -80,9 +80,10 @@ func (nodes *Nodes) Update(nodeID string, res *data.ResponseData) *Node { // Update fields node.Lastseen = now node.Online = true - node.Neighbours = res.Neighbours node.Nodeinfo = res.Nodeinfo node.Statistics = res.Statistics + node.Neighbours = res.Neighbours + node.NoRespondd = res.Statistics == nil && res.Neighbours == nil return node } @@ -170,24 +171,32 @@ func (nodes *Nodes) expire() { nodes.Lock() defer nodes.Unlock() + wg := sync.WaitGroup{} + for id, node := range nodes.List { if node.Lastseen.Before(pruneAfter) { // expire delete(nodes.List, id) } else if node.Lastseen.Before(offlineAfter) { // set to offline - if nodes.config.PingCount > 0 && nodes.ping(node) { - node.Online = true - node.NoRespondd = true - - node.Statistics = nil - node.Neighbours = nil - } else { - node.Online = false - node.NoRespondd = false - } + wg.Add(1) + go func(node *Node) { + defer wg.Done() + if nodes.config.PingCount > 0 && nodes.ping(node) { + node.Online = true + node.NoRespondd = true + + node.Statistics = nil + node.Neighbours = nil + } else { + node.Online = false + node.NoRespondd = false + } + }(node) } } + wg.Wait() + log.WithField("nodes", "expire").Debug("end") } // adds the nodes interface addresses to the internal map @@ -249,6 +258,7 @@ func (nodes *Nodes) save() { // serialize nodes SaveJSON(nodes, nodes.config.StatePath) + log.WithField("nodes", "save").Debug("end") } // SaveJSON to path diff --git a/runtime/nodes_ping.go b/runtime/nodes_ping.go index d5f7e535..91981783 100644 --- a/runtime/nodes_ping.go +++ b/runtime/nodes_ping.go @@ -10,13 +10,21 @@ func (nodes *Nodes) ping(node *Node) bool { if node.Nodeinfo != nil { logNode = logNode.WithField("node_id", node.Nodeinfo.NodeID) } - if node.Address == nil { + var addr string + if node.Address != nil { + addr = node.Address.IP.String() + if node.Address.IP.IsLinkLocalUnicast() { + addr += "%" + node.Address.Zone + } + } else { logNode.Debug("error no address found") - return false - } - addr := node.Address.IP.String() - if node.Address.IP.IsLinkLocalUnicast() { - addr += "%" + node.Address.Zone + if node.Nodeinfo != nil { + for _, addrMaybe := range node.Nodeinfo.Network.Addresses { + if len(addrMaybe) >= 5 && addrMaybe[:5] != "fe80:" { + addr = addrMaybe + } + } + } } logAddr := logNode.WithField("addr", addr) @@ -26,9 +34,10 @@ func (nodes *Nodes) ping(node *Node) bool { logAddr.Debugf("error during ping: %s", err) return false } - //pinger.SetPrivileged(true) + pinger.SetPrivileged(true) pinger.Count = nodes.config.PingCount pinger.Timeout = nodes.config.PingTimeout.Duration + pinger.Interval = pinger.Timeout / pinger.Count pinger.Run() // blocks until finished stats := pinger.Statistics() logAddr.WithFields(map[string]interface{}{ diff --git a/runtime/nodes_ping_test.go b/runtime/nodes_ping_test.go index 170f4d14..23f43a2f 100644 --- a/runtime/nodes_ping_test.go +++ b/runtime/nodes_ping_test.go @@ -27,7 +27,13 @@ func TestPing(t *testing.T) { ifaceToNodeID: make(map[string]string), } - node := nodes.Update("expire", &data.ResponseData{NodeInfo: &data.NodeInfo{NodeID: "nodeID-Lola"}}) + node := nodes.Update("expire", &data.ResponseData{NodeInfo: &data.NodeInfo{ + NodeID: "nodeID-Lola", + Network: data.Network{Addresses: []string{"fe80::1", "fd2f::1"}}, + }}) + // get fallback + assert.False(nodes.ping(node)) + node.Address = &net.UDPAddr{Zone: "bat0"} // error during ping assert.False(nodes.ping(node)) From 76def18d4543ab3c5036db8fb188e2e0376e206f Mon Sep 17 00:00:00 2001 From: Martin/Geno Date: Sun, 20 Jan 2019 01:28:57 +0100 Subject: [PATCH 3/3] change lib --- runtime/nodes.go | 16 ++++++++++------ runtime/nodes_ping.go | 38 +++++++++++++++----------------------- runtime/nodes_ping_test.go | 2 +- 3 files changed, 26 insertions(+), 30 deletions(-) diff --git a/runtime/nodes.go b/runtime/nodes.go index 7e25eb51..448c3729 100644 --- a/runtime/nodes.go +++ b/runtime/nodes.go @@ -7,6 +7,7 @@ import ( "time" "github.com/bdlm/log" + ping "github.com/digineo/go-ping" "github.com/FreifunkBremen/yanic/data" "github.com/FreifunkBremen/yanic/lib/jsontime" @@ -17,6 +18,7 @@ type Nodes struct { List map[string]*Node `json:"nodes"` // the current nodemap, indexed by node ID ifaceToNodeID map[string]string // mapping from MAC address to NodeID config *NodesConfig + pinger *ping.Pinger sync.RWMutex } @@ -27,6 +29,11 @@ func NewNodes(config *NodesConfig) *Nodes { ifaceToNodeID: make(map[string]string), config: config, } + p, err := ping.New("", "::") + if err != nil { + log.Warnf("ping bind failed: %s", err) + } + nodes.pinger = p if config.StatePath != "" { nodes.load() @@ -83,7 +90,7 @@ func (nodes *Nodes) Update(nodeID string, res *data.ResponseData) *Node { node.Nodeinfo = res.Nodeinfo node.Statistics = res.Statistics node.Neighbours = res.Neighbours - node.NoRespondd = res.Statistics == nil && res.Neighbours == nil + node.NoRespondd = false return node } @@ -185,9 +192,6 @@ func (nodes *Nodes) expire() { if nodes.config.PingCount > 0 && nodes.ping(node) { node.Online = true node.NoRespondd = true - - node.Statistics = nil - node.Neighbours = nil } else { node.Online = false node.NoRespondd = false @@ -196,7 +200,7 @@ func (nodes *Nodes) expire() { } } wg.Wait() - log.WithField("nodes", "expire").Debug("end") + log.WithField("nodes", "expire").Info("end") } // adds the nodes interface addresses to the internal map @@ -258,7 +262,7 @@ func (nodes *Nodes) save() { // serialize nodes SaveJSON(nodes, nodes.config.StatePath) - log.WithField("nodes", "save").Debug("end") + log.WithField("nodes", "save").Info("end") } // SaveJSON to path diff --git a/runtime/nodes_ping.go b/runtime/nodes_ping.go index 91981783..e76aa587 100644 --- a/runtime/nodes_ping.go +++ b/runtime/nodes_ping.go @@ -1,8 +1,9 @@ package runtime import ( + "net" + "github.com/bdlm/log" - "github.com/sparrc/go-ping" ) func (nodes *Nodes) ping(node *Node) bool { @@ -10,38 +11,29 @@ func (nodes *Nodes) ping(node *Node) bool { if node.Nodeinfo != nil { logNode = logNode.WithField("node_id", node.Nodeinfo.NodeID) } - var addr string + var addr *net.IPAddr if node.Address != nil { - addr = node.Address.IP.String() - if node.Address.IP.IsLinkLocalUnicast() { - addr += "%" + node.Address.Zone - } + addr = &net.IPAddr{IP:node.Address.IP, Zone: node.Address.Zone} } else { logNode.Debug("error no address found") if node.Nodeinfo != nil { - for _, addrMaybe := range node.Nodeinfo.Network.Addresses { - if len(addrMaybe) >= 5 && addrMaybe[:5] != "fe80:" { - addr = addrMaybe + for _, addrMaybeString := range node.Nodeinfo.Network.Addresses { + if len(addrMaybeString) >= 5 && addrMaybeString[:5] != "fe80:" { + addrMaybe, err := net.ResolveIPAddr("ip6", addrMaybeString) + if err == nil { + addr = addrMaybe + } } } } } - logAddr := logNode.WithField("addr", addr) + logAddr := logNode.WithField("addr", addr.String()) + + _, err := nodes.pinger.PingAttempts(addr, nodes.config.PingTimeout.Duration, nodes.config.PingCount) - pinger, err := ping.NewPinger(addr) - if err != nil { - logAddr.Debugf("error during ping: %s", err) - return false - } - pinger.SetPrivileged(true) - pinger.Count = nodes.config.PingCount - pinger.Timeout = nodes.config.PingTimeout.Duration - pinger.Interval = pinger.Timeout / pinger.Count - pinger.Run() // blocks until finished - stats := pinger.Statistics() logAddr.WithFields(map[string]interface{}{ - "pkg_lost": stats.PacketLoss, + "success": err == nil, }).Debug("pong") - return stats.PacketLoss < 100 + return err == nil } diff --git a/runtime/nodes_ping_test.go b/runtime/nodes_ping_test.go index 23f43a2f..ba212f72 100644 --- a/runtime/nodes_ping_test.go +++ b/runtime/nodes_ping_test.go @@ -27,7 +27,7 @@ func TestPing(t *testing.T) { ifaceToNodeID: make(map[string]string), } - node := nodes.Update("expire", &data.ResponseData{NodeInfo: &data.NodeInfo{ + node := nodes.Update("expire", &data.ResponseData{Nodeinfo: &data.Nodeinfo{ NodeID: "nodeID-Lola", Network: data.Network{Addresses: []string{"fe80::1", "fd2f::1"}}, }})