diff --git a/collector/livestatus/CacheBuilder.go b/collector/livestatus/CacheBuilder.go index 0e12dc2..208b8cb 100644 --- a/collector/livestatus/CacheBuilder.go +++ b/collector/livestatus/CacheBuilder.go @@ -80,7 +80,7 @@ func (builder *CacheBuilder) run(checkInterval time.Duration) { //Builds host/service map which are in downtime func (builder CacheBuilder) createLivestatusCache() Cache { - result := Cache{make(map[string]map[string]string)} + result := Cache{downtime: make(map[string]map[string]string)} downtimeCsv := make(chan []string) finishedDowntime := make(chan bool) hostServiceCsv := make(chan []string) @@ -95,6 +95,10 @@ func (builder CacheBuilder) createLivestatusCache() Cache { for jobsFinished < 2 { select { case downtimesLine := <-downtimeCsv: + if len(downtimesLine) < 3 { + builder.log.Debug("downtimesLine", downtimesLine) + break + } startTime, _ := strconv.Atoi(downtimesLine[1]) entryTime, _ := strconv.Atoi(downtimesLine[2]) latestTime := startTime @@ -118,12 +122,12 @@ func (builder CacheBuilder) createLivestatusCache() Cache { case <-finished: jobsFinished++ case <-time.After(intervalToCheckLivestatusCache / 3): - builder.log.Debug("Livestatus(host/service) timed out") + builder.log.Info("Livestatus timed out...(host/service)") return result } } case <-time.After(intervalToCheckLivestatusCache / 3): - builder.log.Debug("Livestatus(downtimes) timed out") + builder.log.Info("Livestatus timed out...(downtimes)") return result } } diff --git a/collector/livestatus/Collector.go b/collector/livestatus/Collector.go index a7afa56..152f5d6 100644 --- a/collector/livestatus/Collector.go +++ b/collector/livestatus/Collector.go @@ -72,13 +72,13 @@ func NewLivestatusCollector(jobs map[data.Datatype]chan collector.Printable, liv if detectVersion { switch getLivestatusVersion(live) { case Nagios: - live.log.Debug("Livestatus type: Nagios") + live.log.Info("Livestatus type: Nagios") live.logQuery = QueryNagiosForNotifications case Icinga2: - live.log.Debug("Livestatus type: Icinga2") + live.log.Info("Livestatus type: Icinga2") live.logQuery = QueryIcinga2ForNotifications case Naemon: - live.log.Debug("Livestatus type: Naemon") + live.log.Info("Livestatus type: Naemon") live.logQuery = QueryNagiosForNotifications } } @@ -123,8 +123,8 @@ func (live Collector) queryData() { } case <-finished: jobsFinished++ - case <-time.After(intervalToCheckLivestatus / 3): - live.log.Infof("requestPrintablesFromLivestatus timed out. ") + case <-time.After(intervalToCheckLivestatus): + live.log.Warn("Livestatus timed out... (Collector.queryData())") } } } @@ -176,8 +176,8 @@ func (live Collector) requestPrintablesFromLivestatus(query string, addTimestamp default: live.log.Fatal("Found unknown query type" + query) } - case <-finished: - outerFinish <- true + case result := <-finished: + outerFinish <- result return case <-time.After(intervalToCheckLivestatus / 3): live.log.Warn("connectToLivestatus timed out") @@ -218,15 +218,32 @@ func (live Collector) handleQueryForNotifications(line []string) *NotificationDa func getLivestatusVersion(live *Collector) int { printables := make(chan collector.Printable, 1) - live.requestPrintablesFromLivestatus(QueryLivestatusVersion, false, printables, make(chan bool, 1)) + finished := make(chan bool, 1) var version string - select { - case versionPrintable := <-printables: - version = versionPrintable.PrintForInfluxDB("0") - case <-time.After(time.Duration(5) * time.Second): + live.requestPrintablesFromLivestatus(QueryLivestatusVersion, false, printables, finished) + //Wait 3 minutes for livestatus + i := 0 +Loop: + for { + select { + case versionPrintable := <-printables: + version = versionPrintable.PrintForInfluxDB("0") + break Loop + case <-time.After(intervalToCheckLivestatus / 2): + if i < 3 { + go live.requestPrintablesFromLivestatus(QueryLivestatusVersion, false, printables, finished) + } else { + break Loop + } + i++ + case fin := <-finished: + if !fin { + live.log.Info("Could not detect livestatus version, waiting for ", intervalToCheckLivestatus/2, ", three times(", i, ")...") + } + } } - live.log.Debug("Livestatus version: ", version) + live.log.Info("Livestatus version: ", version) if icinga2, _ := regexp.MatchString(`^r[\d\.-]+$`, version); icinga2 { return Icinga2 } else if nagios, _ := regexp.MatchString(`^[\d\.]+p[\d\.]+$`, version); nagios { diff --git a/collector/livestatus/Connector.go b/collector/livestatus/Connector.go index e86d532..f911dd9 100644 --- a/collector/livestatus/Connector.go +++ b/collector/livestatus/Connector.go @@ -31,7 +31,6 @@ func (connector Connector) connectToLivestatus(query string, result chan []strin return } if conn == nil { - connector.Log.Critical("Unable to connect to livestatus: ", connector.LivestatusAddress) outerFinish <- false return } diff --git a/main.go b/main.go index 1a0e6cd..4c5820e 100644 --- a/main.go +++ b/main.go @@ -65,6 +65,7 @@ Commandline Parameter: //Create Logger logging.InitLogger(cfg.Log.LogFile, cfg.Log.MinSeverity) log = logging.GetLogger() + log.Info(`Started Nagflux `, nagfluxVersion) resultQueues := map[data.Datatype]chan collector.Printable{} stoppables := []Stoppable{} if len(cfg.Main.FieldSeparator) < 1 { @@ -135,19 +136,19 @@ loop: for { select { case <-time.After(time.Duration(updateRate) * time.Second): - /*queriesSend, measureTime, err := statisticUser.GetData("send") - if err != nil { - continue - } - idleTime := (measureTime.Seconds() - queriesSend.Time.Seconds() / float64(influx.AmountWorkers())) / updateRate - log.Debugf("Buffer len: %d - Idletime in percent: %0.2f ", len(resultQueues[0]), idleTime * 100) - - //TODO: fix worker spawn by type - if idleTime > 0.25 { - influx.RemoveWorker() - } else if idleTime < 0.1 && float64(len(resultQueues[0])) > resultQueueLength * 0.8 { - influx.AddWorker() - }*/ + /*queriesSend, measureTime, err := statisticUser.GetData("send") + if err != nil { + continue + } + idleTime := (measureTime.Seconds() - queriesSend.Time.Seconds() / float64(influx.AmountWorkers())) / updateRate + log.Debugf("Buffer len: %d - Idletime in percent: %0.2f ", len(resultQueues[0]), idleTime * 100) + + //TODO: fix worker spawn by type + if idleTime > 0.25 { + influx.RemoveWorker() + } else if idleTime < 0.1 && float64(len(resultQueues[0])) > resultQueueLength * 0.8 { + influx.AddWorker() + }*/ case <-quit: break loop }