Skip to content

Commit

Permalink
reworked some logging and fixed an index error
Browse files Browse the repository at this point in the history
  • Loading branch information
Griesbacher committed Nov 16, 2016
1 parent 9f9495a commit 46fc112
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 30 deletions.
10 changes: 7 additions & 3 deletions collector/livestatus/CacheBuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func (builder *CacheBuilder) run(checkInterval time.Duration) {

//Builds host/service map which are in downtime
func (builder CacheBuilder) createLivestatusCache() Cache {
result := Cache{make(map[string]map[string]string)}
result := Cache{downtime: make(map[string]map[string]string)}
downtimeCsv := make(chan []string)
finishedDowntime := make(chan bool)
hostServiceCsv := make(chan []string)
Expand All @@ -95,6 +95,10 @@ func (builder CacheBuilder) createLivestatusCache() Cache {
for jobsFinished < 2 {
select {
case downtimesLine := <-downtimeCsv:
if len(downtimesLine) < 3 {
builder.log.Debug("downtimesLine", downtimesLine)
break
}
startTime, _ := strconv.Atoi(downtimesLine[1])
entryTime, _ := strconv.Atoi(downtimesLine[2])
latestTime := startTime
Expand All @@ -118,12 +122,12 @@ func (builder CacheBuilder) createLivestatusCache() Cache {
case <-finished:
jobsFinished++
case <-time.After(intervalToCheckLivestatusCache / 3):
builder.log.Debug("Livestatus(host/service) timed out")
builder.log.Info("Livestatus timed out...(host/service)")
return result
}
}
case <-time.After(intervalToCheckLivestatusCache / 3):
builder.log.Debug("Livestatus(downtimes) timed out")
builder.log.Info("Livestatus timed out...(downtimes)")
return result
}
}
Expand Down
43 changes: 30 additions & 13 deletions collector/livestatus/Collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,13 @@ func NewLivestatusCollector(jobs map[data.Datatype]chan collector.Printable, liv
if detectVersion {
switch getLivestatusVersion(live) {
case Nagios:
live.log.Debug("Livestatus type: Nagios")
live.log.Info("Livestatus type: Nagios")
live.logQuery = QueryNagiosForNotifications
case Icinga2:
live.log.Debug("Livestatus type: Icinga2")
live.log.Info("Livestatus type: Icinga2")
live.logQuery = QueryIcinga2ForNotifications
case Naemon:
live.log.Debug("Livestatus type: Naemon")
live.log.Info("Livestatus type: Naemon")
live.logQuery = QueryNagiosForNotifications
}
}
Expand Down Expand Up @@ -123,8 +123,8 @@ func (live Collector) queryData() {
}
case <-finished:
jobsFinished++
case <-time.After(intervalToCheckLivestatus / 3):
live.log.Infof("requestPrintablesFromLivestatus timed out. ")
case <-time.After(intervalToCheckLivestatus):
live.log.Warn("Livestatus timed out... (Collector.queryData())")
}
}
}
Expand Down Expand Up @@ -176,8 +176,8 @@ func (live Collector) requestPrintablesFromLivestatus(query string, addTimestamp
default:
live.log.Fatal("Found unknown query type" + query)
}
case <-finished:
outerFinish <- true
case result := <-finished:
outerFinish <- result
return
case <-time.After(intervalToCheckLivestatus / 3):
live.log.Warn("connectToLivestatus timed out")
Expand Down Expand Up @@ -218,15 +218,32 @@ func (live Collector) handleQueryForNotifications(line []string) *NotificationDa

func getLivestatusVersion(live *Collector) int {
printables := make(chan collector.Printable, 1)
live.requestPrintablesFromLivestatus(QueryLivestatusVersion, false, printables, make(chan bool, 1))
finished := make(chan bool, 1)
var version string
select {
case versionPrintable := <-printables:
version = versionPrintable.PrintForInfluxDB("0")
case <-time.After(time.Duration(5) * time.Second):
live.requestPrintablesFromLivestatus(QueryLivestatusVersion, false, printables, finished)
//Wait 3 minutes for livestatus
i := 0
Loop:
for {
select {
case versionPrintable := <-printables:
version = versionPrintable.PrintForInfluxDB("0")
break Loop
case <-time.After(intervalToCheckLivestatus / 2):
if i < 3 {
go live.requestPrintablesFromLivestatus(QueryLivestatusVersion, false, printables, finished)
} else {
break Loop
}
i++
case fin := <-finished:
if !fin {
live.log.Info("Could not detect livestatus version, waiting for ", intervalToCheckLivestatus/2, ", three times(", i, ")...")
}
}
}

live.log.Debug("Livestatus version: ", version)
live.log.Info("Livestatus version: ", version)
if icinga2, _ := regexp.MatchString(`^r[\d\.-]+$`, version); icinga2 {
return Icinga2
} else if nagios, _ := regexp.MatchString(`^[\d\.]+p[\d\.]+$`, version); nagios {
Expand Down
1 change: 0 additions & 1 deletion collector/livestatus/Connector.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ func (connector Connector) connectToLivestatus(query string, result chan []strin
return
}
if conn == nil {
connector.Log.Critical("Unable to connect to livestatus: ", connector.LivestatusAddress)
outerFinish <- false
return
}
Expand Down
27 changes: 14 additions & 13 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ Commandline Parameter:
//Create Logger
logging.InitLogger(cfg.Log.LogFile, cfg.Log.MinSeverity)
log = logging.GetLogger()
log.Info(`Started Nagflux `, nagfluxVersion)
resultQueues := map[data.Datatype]chan collector.Printable{}
stoppables := []Stoppable{}
if len(cfg.Main.FieldSeparator) < 1 {
Expand Down Expand Up @@ -135,19 +136,19 @@ loop:
for {
select {
case <-time.After(time.Duration(updateRate) * time.Second):
/*queriesSend, measureTime, err := statisticUser.GetData("send")
if err != nil {
continue
}
idleTime := (measureTime.Seconds() - queriesSend.Time.Seconds() / float64(influx.AmountWorkers())) / updateRate
log.Debugf("Buffer len: %d - Idletime in percent: %0.2f ", len(resultQueues[0]), idleTime * 100)
//TODO: fix worker spawn by type
if idleTime > 0.25 {
influx.RemoveWorker()
} else if idleTime < 0.1 && float64(len(resultQueues[0])) > resultQueueLength * 0.8 {
influx.AddWorker()
}*/
/*queriesSend, measureTime, err := statisticUser.GetData("send")
if err != nil {
continue
}
idleTime := (measureTime.Seconds() - queriesSend.Time.Seconds() / float64(influx.AmountWorkers())) / updateRate
log.Debugf("Buffer len: %d - Idletime in percent: %0.2f ", len(resultQueues[0]), idleTime * 100)
//TODO: fix worker spawn by type
if idleTime > 0.25 {
influx.RemoveWorker()
} else if idleTime < 0.1 && float64(len(resultQueues[0])) > resultQueueLength * 0.8 {
influx.AddWorker()
}*/
case <-quit:
break loop
}
Expand Down

0 comments on commit 46fc112

Please sign in to comment.