From 62276d310f548d1de4f688dcfd50191ed78b142a Mon Sep 17 00:00:00 2001 From: Enrico Stahn Date: Sun, 25 Feb 2018 00:36:13 +1100 Subject: [PATCH] * Add ability to calculate correct idle/active/total processes #6 * Expose PHP-FPM Pool metrics #4 --- Gopkg.lock | 8 ++++- Gopkg.toml | 4 +++ cmd/server.go | 9 +++++ phpfpm/exporter.go | 89 +++++++++++++++++++++++++++++++++++++--------- phpfpm/phpfpm.go | 64 +++++++++++++++++++++++---------- 5 files changed, 137 insertions(+), 37 deletions(-) diff --git a/Gopkg.lock b/Gopkg.lock index b09a2ad..44c29a3 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -135,6 +135,12 @@ packages = ["."] revision = "8c0189d9f6bbf301e5d055d34268156b317016af" +[[projects]] + name = "github.com/speps/go-hashids" + packages = ["."] + revision = "d1d57a886aa7e3ef6092b70ceab077e35ee8e0ce" + version = "v1.0.0" + [[projects]] name = "github.com/spf13/afero" packages = [ @@ -211,6 +217,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "9d6c96a2ed2824b362a4f9d68ba313678fb7e3f7e38e2fd7058ba9c21a0b2487" + inputs-digest = "679d8f3e08c6fb8c5ef8eb95cedf7aec17654ae00051c78638c88c3731b1dc43" solver-name = "gps-cdcl" solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml index 93700d5..3a19740 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -48,3 +48,7 @@ [prune] go-tests = true unused-packages = true + +[[constraint]] + name = "github.com/speps/go-hashids" + version = "1.0.0" diff --git a/cmd/server.go b/cmd/server.go index 59b80ed..3e15f38 100644 --- a/cmd/server.go +++ b/cmd/server.go @@ -31,6 +31,7 @@ var ( listeningAddress string metricsEndpoint string scrapeURIs []string + fixProcessCount bool ) // serverCmd represents the server command @@ -53,6 +54,12 @@ to quickly create a Cobra application.`, } exporter := phpfpm.NewExporter(pm) + + if fixProcessCount { + log.Info("Idle/Active/Total Processes will be calculated by php-fpm_exporter.") + exporter.CalculateProcessScoreboard = true + } + prometheus.MustRegister(exporter) srv := &http.Server{ @@ -118,6 +125,7 @@ func init() { serverCmd.Flags().StringVar(&listeningAddress, "web.listen-address", ":9253", "Address on which to expose metrics and web interface.") serverCmd.Flags().StringVar(&metricsEndpoint, "web.telemetry-path", "/metrics", "Path under which to expose metrics.") serverCmd.Flags().StringSliceVar(&scrapeURIs, "phpfpm.scrape-uri", []string{"tcp://127.0.0.1:9000/status"}, "FastCGI address, e.g. unix:///tmp/php.sock;/status or tcp://127.0.0.1:9000/status") + serverCmd.Flags().BoolVar(&fixProcessCount, "phpfpm.fix-process-count", false, "Enable to calculate process numbers via php-fpm_exporter since PHP-FPM sporadically reports wrong active/idle/total process numbers.") //viper.BindEnv("web.listen-address", "PHP_FPM_WEB_LISTEN_ADDRESS") //viper.BindPFlag("web.listen-address", serverCmd.Flags().Lookup("web.listen-address")) @@ -128,6 +136,7 @@ func init() { "PHP_FPM_WEB_LISTEN_ADDRESS": "web.listen-address", "PHP_FPM_WEB_TELEMETRY_PATH": "web.telemetry-path", "PHP_FPM_SCRAPE_URI": "phpfpm.scrape-uri", + "PHP_FPM_FIX_PROCESS_COUNT": "phpfpm.fix-process-count", } for env, flag := range envs { diff --git a/phpfpm/exporter.go b/phpfpm/exporter.go index 86343a0..b4abbf6 100644 --- a/phpfpm/exporter.go +++ b/phpfpm/exporter.go @@ -15,6 +15,7 @@ package phpfpm import ( "github.com/prometheus/client_golang/prometheus" + "github.com/speps/go-hashids" "sync" ) @@ -24,22 +25,27 @@ const ( // Exporter configures and exposes PHP-FPM metrics to Prometheus. type Exporter struct { - PoolManager PoolManager mutex sync.Mutex + PoolManager PoolManager - up *prometheus.Desc - scrapeFailues *prometheus.Desc - startSince *prometheus.Desc - acceptedConnections *prometheus.Desc - listenQueue *prometheus.Desc - maxListenQueue *prometheus.Desc - listenQueueLength *prometheus.Desc - idleProcesses *prometheus.Desc - activeProcesses *prometheus.Desc - totalProcesses *prometheus.Desc - maxActiveProcesses *prometheus.Desc - maxChildrenReached *prometheus.Desc - slowRequests *prometheus.Desc + CalculateProcessScoreboard bool + + up *prometheus.Desc + scrapeFailues *prometheus.Desc + startSince *prometheus.Desc + acceptedConnections *prometheus.Desc + listenQueue *prometheus.Desc + maxListenQueue *prometheus.Desc + listenQueueLength *prometheus.Desc + idleProcesses *prometheus.Desc + activeProcesses *prometheus.Desc + totalProcesses *prometheus.Desc + maxActiveProcesses *prometheus.Desc + maxChildrenReached *prometheus.Desc + slowRequests *prometheus.Desc + processRequests *prometheus.Desc + processLastRequestMemory *prometheus.Desc + processLastRequestCPU *prometheus.Desc } // NewExporter creates a new Exporter for a PoolManager and configures the necessary metrics. @@ -47,6 +53,8 @@ func NewExporter(pm PoolManager) *Exporter { return &Exporter{ PoolManager: pm, + CalculateProcessScoreboard: false, + up: prometheus.NewDesc( prometheus.BuildFQName(namespace, "", "up"), "Could PHP-FPM be reached?", @@ -124,6 +132,24 @@ func NewExporter(pm PoolManager) *Exporter { "The number of requests that exceeded your 'request_slowlog_timeout' value.", []string{"pool"}, nil), + + processRequests: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "process_requests"), + "", + []string{"pool", "pid"}, + nil), + + processLastRequestMemory: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "process_last_request_memory"), + "", + []string{"pool", "pid"}, + nil), + + processLastRequestCPU: prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "process_last_request_cpu"), + "", + []string{"pool", "pid"}, + nil), } } @@ -143,18 +169,36 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { continue } + active, idle, total := CalculateProcessScoreboard(pool) + if active != pool.ActiveProcesses || idle != pool.IdleProcesses { + log.Error("Inconsistent active and idle processes reported. Set `--fix-process-count` to have this calculated by php-fpm_exporter instead.") + } + + if e.CalculateProcessScoreboard == false { + active = pool.ActiveProcesses + idle = pool.IdleProcesses + total = pool.TotalProcesses + } + ch <- prometheus.MustNewConstMetric(e.up, prometheus.GaugeValue, 1, pool.Name) ch <- prometheus.MustNewConstMetric(e.startSince, prometheus.CounterValue, float64(pool.AcceptedConnections), pool.Name) ch <- prometheus.MustNewConstMetric(e.acceptedConnections, prometheus.CounterValue, float64(pool.StartSince), pool.Name) ch <- prometheus.MustNewConstMetric(e.listenQueue, prometheus.GaugeValue, float64(pool.ListenQueue), pool.Name) ch <- prometheus.MustNewConstMetric(e.maxListenQueue, prometheus.CounterValue, float64(pool.MaxListenQueue), pool.Name) ch <- prometheus.MustNewConstMetric(e.listenQueueLength, prometheus.GaugeValue, float64(pool.ListenQueueLength), pool.Name) - ch <- prometheus.MustNewConstMetric(e.idleProcesses, prometheus.GaugeValue, float64(pool.IdleProcesses), pool.Name) - ch <- prometheus.MustNewConstMetric(e.activeProcesses, prometheus.GaugeValue, float64(pool.ActiveProcesses), pool.Name) - ch <- prometheus.MustNewConstMetric(e.totalProcesses, prometheus.GaugeValue, float64(pool.TotalProcesses), pool.Name) + ch <- prometheus.MustNewConstMetric(e.idleProcesses, prometheus.GaugeValue, float64(idle), pool.Name) + ch <- prometheus.MustNewConstMetric(e.activeProcesses, prometheus.GaugeValue, float64(active), pool.Name) + ch <- prometheus.MustNewConstMetric(e.totalProcesses, prometheus.GaugeValue, float64(total), pool.Name) ch <- prometheus.MustNewConstMetric(e.maxActiveProcesses, prometheus.CounterValue, float64(pool.MaxActiveProcesses), pool.Name) ch <- prometheus.MustNewConstMetric(e.maxChildrenReached, prometheus.CounterValue, float64(pool.MaxChildrenReached), pool.Name) ch <- prometheus.MustNewConstMetric(e.slowRequests, prometheus.CounterValue, float64(pool.SlowRequests), pool.Name) + + for _, process := range pool.Processes { + pid := calculateProcessHash(process) + ch <- prometheus.MustNewConstMetric(e.processRequests, prometheus.CounterValue, float64(process.Requests), pool.Name, pid) + ch <- prometheus.MustNewConstMetric(e.processLastRequestMemory, prometheus.GaugeValue, float64(process.LastRequestMemory), pool.Name, pid) + ch <- prometheus.MustNewConstMetric(e.processLastRequestCPU, prometheus.GaugeValue, float64(process.LastRequestCPU), pool.Name, pid) + } } return @@ -174,3 +218,14 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { ch <- e.maxChildrenReached ch <- e.slowRequests } + +// calculateProcessHash generates a unique identifier for a process to ensure uniqueness across multiple systems/containers +func calculateProcessHash(pp PoolProcess) string { + hd := hashids.NewData() + hd.Salt = "php-fpm_exporter" + hd.MinLength = 12 + h := hashids.NewWithData(hd) + e, _ := h.Encode([]int{int(pp.StartTime), int(pp.PID)}) + + return e +} diff --git a/phpfpm/phpfpm.go b/phpfpm/phpfpm.go index a48d3f1..00b46a8 100644 --- a/phpfpm/phpfpm.go +++ b/phpfpm/phpfpm.go @@ -25,11 +25,18 @@ import ( "time" ) +const PoolProcessRequestIdle string = "Idle" +const PoolProcessRequestActive string = "Running" + var log logger type logger interface { + Info(ar ...interface{}) + Infof(string, ...interface{}) + Debug(ar ...interface{}) Debugf(string, ...interface{}) Error(ar ...interface{}) + Errorf(string, ...interface{}) } // PoolManager manages all configured Pools @@ -46,34 +53,34 @@ type Pool struct { Name string `json:"pool"` ProcessManager string `json:"process manager"` StartTime timestamp `json:"start time"` - StartSince int `json:"start since"` - AcceptedConnections int `json:"accepted conn"` - ListenQueue int `json:"listen queue"` - MaxListenQueue int `json:"max listen queue"` - ListenQueueLength int `json:"listen queue len"` - IdleProcesses int `json:"idle processes"` - ActiveProcesses int `json:"active processes"` - TotalProcesses int `json:"total processes"` - MaxActiveProcesses int `json:"max active processes"` - MaxChildrenReached int `json:"max children reached"` - SlowRequests int `json:"slow requests"` + StartSince int64 `json:"start since"` + AcceptedConnections int64 `json:"accepted conn"` + ListenQueue int64 `json:"listen queue"` + MaxListenQueue int64 `json:"max listen queue"` + ListenQueueLength int64 `json:"listen queue len"` + IdleProcesses int64 `json:"idle processes"` + ActiveProcesses int64 `json:"active processes"` + TotalProcesses int64 `json:"total processes"` + MaxActiveProcesses int64 `json:"max active processes"` + MaxChildrenReached int64 `json:"max children reached"` + SlowRequests int64 `json:"slow requests"` Processes []PoolProcess `json:"processes"` } // PoolProcess describes a single PHP-FPM process. A pool can have multiple processes. type PoolProcess struct { - PID int `json:"pid"` + PID int64 `json:"pid"` State string `json:"state"` - StartTime int `json:"start time"` - StartSince int `json:"start since"` - Requests int `json:"requests"` - RequestDuration int `json:"request duration"` + StartTime int64 `json:"start time"` + StartSince int64 `json:"start since"` + Requests int64 `json:"requests"` + RequestDuration int64 `json:"request duration"` RequestMethod string `json:"request method"` RequestURI string `json:"request uri"` - ContentLength int `json:"content length"` + ContentLength int64 `json:"content length"` User string `json:"user"` Script string `json:"script"` - LastRequestCPU float32 `json:"last request cpu"` + LastRequestCPU float64 `json:"last request cpu"` LastRequestMemory int `json:"last request memory"` } @@ -143,7 +150,7 @@ func (p *Pool) Update() (err error) { return p.error(err) } - log.Debugf("Pool[", p.Address, "]:", string(content)) + log.Debugf("Pool[%v]: %v", p.Address, string(content)) if err = json.Unmarshal(content, &p); err != nil { return p.error(err) @@ -159,6 +166,25 @@ func (p *Pool) error(err error) error { return err } +func CalculateProcessScoreboard(p Pool) (active int64, idle int64, total int64) { + active = 0 + idle = 0 + total = 0 + + for idx := range p.Processes { + switch p.Processes[idx].State { + case PoolProcessRequestActive: + active++ + case PoolProcessRequestIdle: + idle++ + default: + log.Errorf("Unknown process state '%v'", p.Processes[idx].State) + } + } + + return active, idle, active + idle +} + type timestamp time.Time // MarshalJSON customise JSON for timestamp