Skip to content

Commit

Permalink
Imporing repo
Browse files Browse the repository at this point in the history
  • Loading branch information
chrusty committed Mar 13, 2015
0 parents commit a0a21f7
Show file tree
Hide file tree
Showing 16 changed files with 834 additions and 0 deletions.
Empty file added DEBIAN/conffiles
Empty file.
10 changes: 10 additions & 0 deletions DEBIAN/control
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Package: ctop
Version: 1.3
Architecture: amd64
Essential: no
Section: Applications
Priority: optional
Depends: libmx4j-java
Maintainer: devops@hailocab.com
Installed-Size: 1000
Description: C-TOP (Top for Cassandra)
16 changes: 16 additions & 0 deletions DEBIAN/md5sums
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
9f2d62b9104016299653a3d0a26907d8 usr/local/bin/zoo-disencrufter
1775f4b499fc40fb418525318a16aa1d usr/local/bin/zoo-encrufter
b98d7fbf3624e3b81a8f551bc55a5647 usr/local/bin/zoo-nodegetter
e033957321c3cd9fd716177d453b3c89 usr/local/bin/zoo-watcher
07a8e2ec94060cd375c9736699b79870 usr/local/bin/zoo-locker
6438a6dceb527a55ccc2c00730a92f2a usr/share/zoo-disencrufter/zoo-disencrufter.cfg-HAILO-STAGING-GLOBAL02
31524844d87f1bf308562fd83e971ec2 usr/share/zoo-disencrufter/zoo-disencrufter.cfg-HAILO-TEST-GLOBAL01
c3bedb047347f2d5038923edec6ccd5e usr/share/zoo-disencrufter/zoo-disencrufter.cfg-CRUFT
a1b9a590b193003547e0697fa49e3998 usr/share/zoo-disencrufter/zoo-disencrufter.cfg-HAILO-STAGING-GLOBAL01
04ef4a86253fedbb20cc53a8a45833bc usr/share/zoo-disencrufter/zoo-disencrufter.cfg-HAILO-LIVE-GLOBAL01
7d265d9c39c016f19d5dd88e572a7133 usr/share/zoo-disencrufter/zoo-disencrufter.cfg-HAILO-TEST-GLOBAL02
a5d643862153f2b3cc3dcb03801ab3a1 usr/share/zoo-disencrufter/zoo-disencrufter.cfg-HAILO-LIVE-GLOBAL02
670f666e4b7f37d771ce204e5c432aef etc/.DS_Store
68c5df9c57c6f5db62801d64a7b48a77 etc/cron.d/zoo-disencrufter
a057902a60540c7532bf72ced7912f97 etc/cron.d/ubuntu-zookeeper
1251a0f35df20303261799102a0635ef etc/zoo-disencrufter.cfg
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The MIT License (MIT)

Copyright (c) 2015 Hailo

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
40 changes: 40 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# CTOP ("Top for Cassandra")

CTOP is a tool which allows you to quickly find out what's happening on a machine running Cassandra. It is particularly useful on a cluster with multiple-tenants, multiple-applications, and large numbers of tables. If you suspect that the performance is not good, then you can use this to figure out which table is giving you trouble.

## Screenshots:
![Ordered by read-latency](docs/ordered-by-read-latency.png "Ordered by read-latency")
![Ordered by disk-space](docs/ordered-by-diskspace.png "Ordered by disk-space")
![Ordered by write-rate](docs/ordered-by-write-rate.png "Ordered by write-rate")

## Installation:
* Download a [DEB file](https://github.com/hailocab/ctop/releases/download/1.3/ctop_1.3_amd64.deb "CTOP 1.3") from the [releases](https://github.com/hailocab/ctop/releases "releases") section
* Install mx4j "apt-get install libmx4j-java"
* Install CTOP "dpkg -i ctop_1.3_amd64.deb"
* Find out what's killing your Cassandra cluster

## How to use it:
* Run the binary
* Allow CTOP some time to collect metrics, then press SPACE to refresh the display. You will see your tables listed by Reads/s (in descending order).
* You can press SPACE at any time to refresh the display
* The numbers 1 through 5 change the sorting order:
1: Order by Reads/s
2: Order by Writes/s
3: Order by the amount of disk-space used (in Bytes)
4: Order by read-latency (in miliseconds)
5: Order by write-latency (in miliseconds)
* Pressing "Q" will quit
* Metrics are for one node only (not cluster-wide)

## Notes
* CTOP should run on anything, but I've only tested it on Ubuntu.

## Pre-requisites:
* CTOP uses "libmx4j-java" (an HTTP -> JMX) to retrieve JMX metrics from Cassandra (there was no easy way to query JMX directly), so this needs to be in the class-path where Cassandra can find it upon startup (with Ubuntu it is enough to do "apt-get install libmx4j-java", then to re-start Cassandra).
* Cassandra needs to be told how to run MX4J (the default is to listen to the interface that Cassandra listens on):
```
MX4J_ADDRESS="-Dmx4jaddress=127.0.0.1"
MX4J_PORT="-Dmx4jport=8081"
JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS"
JVM_OPTS="$JVM_OPTS $MX4J_PORT"
```
158 changes: 158 additions & 0 deletions display.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
package main

import (
// "flag"
"fmt"
"github.com/nsf/termbox-go"
"time"
// "os"
)

// Reads log-messages out of the logMessage chan and displays them to screen:
func show_logs() {
termbox.Clear(termbox.ColorDefault, termbox.ColorDefault)

printf_tb(2, 1, messageForeGroundColour, termbox.ColorBlack, " |")
printf_tb(2, 1, messageForeGroundColour|termbox.AttrBold, termbox.ColorBlack, "Severity")
printf_tb(13, 1, messageForeGroundColour|termbox.AttrBold, termbox.ColorBlack, "Message")

for y := 2; y < termHeight; y++ {
select {
// attempt to receive from channel:
case logMessage := <-messageChannel:
printf_tb(2, y, messageForeGroundColour, termbox.ColorBlack, "%s", logMessage.Severity)
printf_tb(13, y, messageForeGroundColour, termbox.ColorBlack, "%s", logMessage.Message)
default:
printf_tb(2, y, messageForeGroundColour, termbox.ColorBlack, "No more logs")
return
}
}
}

// Draws stats on the screen:
func show_stats() {
termbox.Clear(termbox.ColorDefault, termbox.ColorDefault)

// Positions: 2 22 42 52 63 79 94
printf_tb(2, 1, messageForeGroundColour|termbox.AttrBold, termbox.ColorBlack, "KeySpace ColumnFamily Reads/s Writes/s LiveSpace(B) R-Latency(ms) W-Latency(ms)")
printf_tb(20, 1, messageForeGroundColour, termbox.ColorBlack, "|")
printf_tb(40, 1, messageForeGroundColour, termbox.ColorBlack, "|")
printf_tb(50, 1, messageForeGroundColour, termbox.ColorBlack, "|")
printf_tb(61, 1, messageForeGroundColour, termbox.ColorBlack, "|")
printf_tb(76, 1, messageForeGroundColour, termbox.ColorBlack, "|")
printf_tb(92, 1, messageForeGroundColour, termbox.ColorBlack, "|")

y := 2

// Get a lock on stats, then make a sorted map of the stats:
statsMutex.Lock()
sortedStats := sortedKeys(stats)
statsMutex.Unlock()

for _, cfStatsKey := range sortedStats {
if y < termHeight {
// printf_tb(2, y, messageForeGroundColour, termbox.ColorBlack, "(%s:%s) r:%d, w:%d", cfStats.KeySpace, cfStats.ColumnFamily, cfStats.ReadCount, cfStats.WriteCount)
printf_tb(2, y, messageForeGroundColour, termbox.ColorBlack, "%s", stats[cfStatsKey].KeySpace)
printf_tb(20, y, messageForeGroundColour, termbox.ColorBlack, " %s", stats[cfStatsKey].ColumnFamily)
printf_tb(40, y, messageForeGroundColour, termbox.ColorBlack, " %f", stats[cfStatsKey].ReadRate)
printf_tb(50, y, messageForeGroundColour, termbox.ColorBlack, " %f", stats[cfStatsKey].WriteRate)
printf_tb(61, y, messageForeGroundColour, termbox.ColorBlack, " %d", stats[cfStatsKey].LiveDiskSpaceUsed)
printf_tb(76, y, messageForeGroundColour, termbox.ColorBlack, " %f", stats[cfStatsKey].ReadLatency)
printf_tb(92, y, messageForeGroundColour, termbox.ColorBlack, " %f", stats[cfStatsKey].WriteLatency)
y++
}
}
}

// Refreshes the on-screen data:
func refresh_screen() {
for {

if dataDisplayed == "Metrics" {
show_stats()
}

if dataDisplayed == "Logs" {
show_logs()
}

// Sleep:
time.Sleep(refreshTime)
}
}

// Print function for TermBox:
func print_tb(x, y int, fg, bg termbox.Attribute, msg string) {
for _, c := range msg {
termbox.SetCell(x, y, c, fg, bg)
x++
}
}

// PrintF function for TermBox:
func printf_tb(x, y int, fg, bg termbox.Attribute, format string, args ...interface{}) {
s := fmt.Sprintf(format, args...)
print_tb(x, y, fg, bg, s)
}

// Draw the border around the edge of the screen:
func draw_border(width int, height int) {
// Sides:
for x := 0; x < width; x++ {
termbox.SetCell(x, 0, '-', defaultForeGroundColour, defaultBackGroundColour)
termbox.SetCell(x, height-1, '-', defaultForeGroundColour, defaultBackGroundColour)
}

// Top and bottom:
for y := 0; y < height; y++ {
termbox.SetCell(0, y, '|', defaultForeGroundColour, defaultBackGroundColour)
termbox.SetCell(width-1, y, '|', defaultForeGroundColour, defaultBackGroundColour)
}

// Corners:
termbox.SetCell(0, 0, '+', defaultForeGroundColour, defaultBackGroundColour)
termbox.SetCell(width-1, 0, '+', defaultForeGroundColour, defaultBackGroundColour)
termbox.SetCell(0, height-1, '+', defaultForeGroundColour, defaultBackGroundColour)
termbox.SetCell(width-1, height-1, '+', defaultForeGroundColour, defaultBackGroundColour)

// Title:
print_tb(1, 0, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, " C-top ")
print_tb(8, 0, termbox.ColorBlue, defaultBackGroundColour, "(top for Cassandra) ")

// Menu:
// Positions: 2 15 28 42 58 76 94 105 113
print_tb(1, height-1, termbox.ColorBlue, defaultBackGroundColour, " Organise by (1)Reads/s / (2)Writes/s / (3)Space-used / (4)Read-latency / (5)Write-latency, (M)etrics, (L)ogs, (Q)uit ")
print_tb(15, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "1")
print_tb(28, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "2")
print_tb(42, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "3")
print_tb(58, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "4")
print_tb(76, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "5")
print_tb(94, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "M")
print_tb(105, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "L")
print_tb(113, height-1, termbox.ColorBlue|termbox.AttrBold, defaultBackGroundColour, "Q")

// Highlight the sorting mode:
if dataSortedBy == "Reads" {
print_tb(15, height-1, termbox.ColorWhite|termbox.AttrBold, defaultBackGroundColour, "1")
}
if dataSortedBy == "Writes" {
print_tb(28, height-1, termbox.ColorWhite|termbox.AttrBold, defaultBackGroundColour, "2")
}
if dataSortedBy == "Space" {
print_tb(42, height-1, termbox.ColorWhite|termbox.AttrBold, defaultBackGroundColour, "3")
}
if dataSortedBy == "ReadLatency" {
print_tb(58, height-1, termbox.ColorWhite|termbox.AttrBold, defaultBackGroundColour, "4")
}
if dataSortedBy == "WriteLatency" {
print_tb(76, height-1, termbox.ColorWhite|termbox.AttrBold, defaultBackGroundColour, "5")
}

// Show what mode we're in:
if dataDisplayed == "Metrics" {
printf_tb(termWidth-10, 0, termbox.ColorBlue|termbox.AttrBold, termbox.ColorBlack, " Metrics ")
}
if dataDisplayed == "Logs" {
printf_tb(termWidth-7, 0, termbox.ColorBlue|termbox.AttrBold, termbox.ColorBlack, " Logs ")
}
}
Binary file added docs/ordered-by-diskspace.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/ordered-by-read-latency.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/ordered-by-write-rate.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
121 changes: 121 additions & 0 deletions handlers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
package main

import (
types "./types"
// "flag"
"fmt"
"github.com/nsf/termbox-go"
// "time"
// "os"
)

// Logging to a channel (from anywhere):
func log_to_channel(severity string, message string) {
// Make a new LogMessage struct:
logMessage := types.LogMessage{
Severity: severity,
Message: message,
}

// Put it in the messages channel:
select {
case messageChannel <- logMessage:

default:

}
}

// Takes metrics off the channel and adds them up:
func handle_metrics() {

var cfStats types.CFStats

for {
// Get a metric from the channel:
cfMetric := <-metricsChannel
log_to_channel("debug", fmt.Sprintf("Received a metric! %s", cfMetric.MetricName))

// Build the key:
statName := cfMetric.KeySpace + ":" + cfMetric.ColumnFamily

statsMutex.Lock()
defer statsMutex.Unlock()

// See if we already have a stats-entry:
if _, ok := stats[statName]; ok {
// Use the existing stats-entry:
log_to_channel("debug", fmt.Sprintf("Updating existing stat (%s)", statName))
cfStats = stats[statName]
} else {
// Add a new entry to the map:
log_to_channel("debug", fmt.Sprintf("Adding new stat (%s)", statName))
cfStats = types.CFStats{
ReadCount: 0,
ReadCountTS: 0,
ReadLatency: 0.0,
ReadRate: 0.0,
WriteCount: 0,
WriteCountTS: 0,
WriteLatency: 0.0,
WriteRate: 0.0,
KeySpace: cfMetric.KeySpace,
ColumnFamily: cfMetric.ColumnFamily,
}
}

// Figure out which metric we need to update:
if cfMetric.MetricName == "ReadCount" {
// Total read count:
interval := cfMetric.MetricTimeStamp - cfStats.ReadCountTS
if cfStats.ReadCountTS == 0 {
cfStats.ReadRate = 0.0
} else {
cfStats.ReadRate = float64(cfMetric.MetricIntValue-cfStats.ReadCount) / float64(interval)
}
cfStats.ReadCount = cfMetric.MetricIntValue
cfStats.ReadCountTS = cfMetric.MetricTimeStamp
stats[statName] = cfStats

} else if cfMetric.MetricName == "WriteCount" {
// Total write count:
interval := cfMetric.MetricTimeStamp - cfStats.WriteCountTS
if cfStats.WriteCountTS == 0 {
cfStats.WriteRate = 0.0
} else {
cfStats.WriteRate = float64(cfMetric.MetricIntValue-cfStats.WriteCount) / float64(interval)
}
cfStats.WriteCount = cfMetric.MetricIntValue
cfStats.WriteCountTS = cfMetric.MetricTimeStamp
stats[statName] = cfStats

} else if cfMetric.MetricName == "LiveDiskSpaceUsed" {
// Total disk space used(k):
cfStats.LiveDiskSpaceUsed = cfMetric.MetricIntValue
stats[statName] = cfStats

} else if cfMetric.MetricName == "RecentReadLatencyMicros" {
// ReadLatency (MicroSeconds):
if cfMetric.MetricFloatValue > 0 {
cfStats.ReadLatency = cfMetric.MetricFloatValue / 1000
stats[statName] = cfStats
}

} else if cfMetric.MetricName == "RecentWriteLatencyMicros" {
// WriteLatency (MicroSeconds):
if cfMetric.MetricFloatValue > 0 {
cfStats.WriteLatency = cfMetric.MetricFloatValue / 1000
stats[statName] = cfStats
}
}

statsMutex.Unlock()

}

}

// Returns the key-code:
func handle_keypress(ev *termbox.Event) {
log_to_channel("debug", fmt.Sprintf("Key pressed: %s", ev.Ch))
}
Loading

0 comments on commit a0a21f7

Please sign in to comment.