// Copyright 2010 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.
// Package pprof writes runtime profiling data in the format expected// by the pprof visualization tool.//// Profiling a Go program//// The first step to profiling a Go program is to enable profiling.// Support for profiling benchmarks built with the standard testing// package is built into go test. For example, the following command// runs benchmarks in the current directory and writes the CPU and// memory profiles to cpu.prof and mem.prof://// go test -cpuprofile cpu.prof -memprofile mem.prof -bench .//// To add equivalent profiling support to a standalone program, add// code like the following to your main function://// var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")// var memprofile = flag.String("memprofile", "", "write memory profile to `file`")//// func main() {// flag.Parse()// if *cpuprofile != "" {// f, err := os.Create(*cpuprofile)// if err != nil {// log.Fatal("could not create CPU profile: ", err)// }// defer f.Close() // error handling omitted for example// if err := pprof.StartCPUProfile(f); err != nil {// log.Fatal("could not start CPU profile: ", err)// }// defer pprof.StopCPUProfile()// }//// // ... rest of the program ...//// if *memprofile != "" {// f, err := os.Create(*memprofile)// if err != nil {// log.Fatal("could not create memory profile: ", err)// }// defer f.Close() // error handling omitted for example// runtime.GC() // get up-to-date statistics// if err := pprof.WriteHeapProfile(f); err != nil {// log.Fatal("could not write memory profile: ", err)// }// }// }//// There is also a standard HTTP interface to profiling data. Adding// the following line will install handlers under the /debug/pprof/// URL to download live profiles://// import _ "net/http/pprof"//// See the net/http/pprof package for more details.//// Profiles can then be visualized with the pprof tool://// go tool pprof cpu.prof//// There are many commands available from the pprof command line.// Commonly used commands include "top", which prints a summary of the// top program hot-spots, and "web", which opens an interactive graph// of hot-spots and their call graphs. Use "help" for information on// all pprof commands.//// For more information about pprof, see// https://github.com/google/pprof/blob/master/doc/README.md.
package pprofimport ()// BUG(rsc): Profiles are only as good as the kernel support used to generate them.// See https://golang.org/issue/13841 for details about known problems.// A Profile is a collection of stack traces showing the call sequences// that led to instances of a particular event, such as allocation.// Packages can create and maintain their own profiles; the most common// use is for tracking resources that must be explicitly closed, such as files// or network connections.//// A Profile's methods can be called from multiple goroutines simultaneously.//// Each Profile has a unique name. A few profiles are predefined://// goroutine - stack traces of all current goroutines// heap - a sampling of memory allocations of live objects// allocs - a sampling of all past memory allocations// threadcreate - stack traces that led to the creation of new OS threads// block - stack traces that led to blocking on synchronization primitives// mutex - stack traces of holders of contended mutexes//// These predefined profiles maintain themselves and panic on an explicit// Add or Remove method call.//// The heap profile reports statistics as of the most recently completed// garbage collection; it elides more recent allocation to avoid skewing// the profile away from live data and toward garbage.// If there has been no garbage collection at all, the heap profile reports// all known allocations. This exception helps mainly in programs running// without garbage collection enabled, usually for debugging purposes.//// The heap profile tracks both the allocation sites for all live objects in// the application memory and for all objects allocated since the program start.// Pprof's -inuse_space, -inuse_objects, -alloc_space, and -alloc_objects// flags select which to display, defaulting to -inuse_space (live objects,// scaled by size).//// The allocs profile is the same as the heap profile but changes the default// pprof display to -alloc_space, the total number of bytes allocated since// the program began (including garbage-collected bytes).//// The CPU profile is not available as a Profile. It has a special API,// the StartCPUProfile and StopCPUProfile functions, because it streams// output to a writer during profiling.//typeProfilestruct {namestringmusync.Mutexmmap[any][]uintptrcountfunc() intwritefunc(io.Writer, int) error}// profiles records all registered profiles.varprofilesstruct { mu sync.Mutex m map[string]*Profile}vargoroutineProfile = &Profile{name: "goroutine",count: countGoroutine,write: writeGoroutine,}varthreadcreateProfile = &Profile{name: "threadcreate",count: countThreadCreate,write: writeThreadCreate,}varheapProfile = &Profile{name: "heap",count: countHeap,write: writeHeap,}varallocsProfile = &Profile{name: "allocs",count: countHeap, // identical to heap profilewrite: writeAlloc,}varblockProfile = &Profile{name: "block",count: countBlock,write: writeBlock,}varmutexProfile = &Profile{name: "mutex",count: countMutex,write: writeMutex,}func () {profiles.mu.Lock()ifprofiles.m == nil {// Initial built-in profiles.profiles.m = map[string]*Profile{"goroutine": goroutineProfile,"threadcreate": threadcreateProfile,"heap": heapProfile,"allocs": allocsProfile,"block": blockProfile,"mutex": mutexProfile, } }}func () {profiles.mu.Unlock()}// NewProfile creates a new profile with the given name.// If a profile with that name already exists, NewProfile panics.// The convention is to use a 'import/path.' prefix to create// separate name spaces for each package.// For compatibility with various tools that read pprof data,// profile names should not contain spaces.func ( string) *Profile {lockProfiles()deferunlockProfiles()if == "" {panic("pprof: NewProfile with empty name") }ifprofiles.m[] != nil {panic("pprof: NewProfile name already in use: " + ) } := &Profile{name: ,m: map[any][]uintptr{}, }profiles.m[] = return}// Lookup returns the profile with the given name, or nil if no such profile exists.func ( string) *Profile {lockProfiles()deferunlockProfiles()returnprofiles.m[]}// Profiles returns a slice of all the known profiles, sorted by name.func () []*Profile {lockProfiles()deferunlockProfiles() := make([]*Profile, 0, len(profiles.m))for , := rangeprofiles.m { = append(, ) }sort.Slice(, func(, int) bool { return [].name < [].name })return}// Name returns this profile's name, which can be passed to Lookup to reobtain the profile.func ( *Profile) () string {return .name}// Count returns the number of execution stacks currently in the profile.func ( *Profile) () int { .mu.Lock()defer .mu.Unlock()if .count != nil {return .count() }returnlen(.m)}// Add adds the current execution stack to the profile, associated with value.// Add stores value in an internal map, so value must be suitable for use as// a map key and will not be garbage collected until the corresponding// call to Remove. Add panics if the profile already contains a stack for value.//// The skip parameter has the same meaning as runtime.Caller's skip// and controls where the stack trace begins. Passing skip=0 begins the// trace in the function calling Add. For example, given this// execution stack://// Add// called from rpc.NewClient// called from mypkg.Run// called from main.main//// Passing skip=0 begins the stack trace at the call to Add inside rpc.NewClient.// Passing skip=1 begins the stack trace at the call to NewClient inside mypkg.Run.//func ( *Profile) ( any, int) {if .name == "" {panic("pprof: use of uninitialized Profile") }if .write != nil {panic("pprof: Add called on built-in Profile " + .name) } := make([]uintptr, 32) := runtime.Callers(+1, [:]) = [:]iflen() == 0 {// The value for skip is too large, and there's no stack trace to record. = []uintptr{abi.FuncPCABIInternal(lostProfileEvent)} } .mu.Lock()defer .mu.Unlock()if .m[] != nil {panic("pprof: Profile.Add of duplicate value") } .m[] = }// Remove removes the execution stack associated with value from the profile.// It is a no-op if the value is not in the profile.func ( *Profile) ( any) { .mu.Lock()defer .mu.Unlock()delete(.m, )}// WriteTo writes a pprof-formatted snapshot of the profile to w.// If a write to w returns an error, WriteTo returns that error.// Otherwise, WriteTo returns nil.//// The debug parameter enables additional output.// Passing debug=0 writes the gzip-compressed protocol buffer described// in https://github.com/google/pprof/tree/master/proto#overview.// Passing debug=1 writes the legacy text format with comments// translating addresses to function names and line numbers, so that a// programmer can read the profile without tools.//// The predefined profiles may assign meaning to other debug values;// for example, when printing the "goroutine" profile, debug=2 means to// print the goroutine stacks in the same form that a Go program uses// when dying due to an unrecovered panic.func ( *Profile) ( io.Writer, int) error {if .name == "" {panic("pprof: use of zero Profile") }if .write != nil {return .write(, ) }// Obtain consistent snapshot under lock; then process without lock. .mu.Lock() := make([][]uintptr, 0, len(.m))for , := range .m { = append(, ) } .mu.Unlock()// Map order is non-deterministic; make output deterministic.sort.Slice(, func(, int) bool { , := [], []for := 0; < len() && < len(); ++ {if [] != [] {return [] < [] } }returnlen() < len() })returnprintCountProfile(, , .name, stackProfile())}typestackProfile [][]uintptrfunc ( stackProfile) () int { returnlen() }func ( stackProfile) ( int) []uintptr { return [] }func ( stackProfile) ( int) *labelMap { returnnil }// A countProfile is a set of stack traces to be printed as counts// grouped by stack trace. There are multiple implementations:// all that matters is that we can find out how many traces there are// and obtain each trace in turn.typecountProfileinterface {Len() intStack(i int) []uintptrLabel(i int) *labelMap}// printCountCycleProfile outputs block profile records (for block or mutex profiles)// as the pprof-proto format output. Translations from cycle count to time duration// are done because The proto expects count and time (nanoseconds) instead of count// and the number of cycles for block, contention profiles.// Possible 'scaler' functions are scaleBlockProfile and scaleMutexProfile.func ( io.Writer, , string, func(int64, float64) (int64, float64), []runtime.BlockProfileRecord) error {// Output profile in protobuf form. := newProfileBuilder() .pbValueType(tagProfile_PeriodType, , "count") .pb.int64Opt(tagProfile_Period, 1) .pbValueType(tagProfile_SampleType, , "count") .pbValueType(tagProfile_SampleType, , "nanoseconds") := float64(runtime_cyclesPerSecond()) / 1e9 := []int64{0, 0}var []uint64for , := range { , := (.Count, float64(.Cycles)/) [0] = [1] = int64()// For count profiles, all stack addresses are // return PCs, which is what appendLocsForStack expects. = .appendLocsForStack([:0], .Stack()) .pbSample(, , nil) } .build()returnnil}// printCountProfile prints a countProfile at the specified debug level.// The profile will be in compressed proto format unless debug is nonzero.func ( io.Writer, int, string, countProfile) error {// Build count of each stack.varbytes.Buffer := func( []uintptr, *labelMap) string { .Reset()fmt.Fprintf(&, "@")for , := range {fmt.Fprintf(&, " %#x", ) }if != nil { .WriteString("\n# labels: ") .WriteString(.String()) }return .String() } := map[string]int{} := map[string]int{}var []string := .Len()for := 0; < ; ++ { := (.Stack(), .Label())if [] == 0 { [] = = append(, ) } []++ }sort.Sort(&keysByCount{, })if > 0 {// Print debug profile in legacy format := tabwriter.NewWriter(, 1, 8, 1, '\t', 0)fmt.Fprintf(, "%s profile: total %d\n", , .Len())for , := range {fmt.Fprintf(, "%d %s\n", [], )printStackRecord(, .Stack([]), false) }return .Flush() }// Output profile in protobuf form. := newProfileBuilder() .pbValueType(tagProfile_PeriodType, , "count") .pb.int64Opt(tagProfile_Period, 1) .pbValueType(tagProfile_SampleType, , "count") := []int64{0}var []uint64for , := range { [0] = int64([])// For count profiles, all stack addresses are // return PCs, which is what appendLocsForStack expects. = .appendLocsForStack([:0], .Stack([])) := []varfunc()if .Label() != nil { = func() {for , := range *.Label() { .pbLabel(tagSample_Label, , , 0) } } } .pbSample(, , ) } .build()returnnil}// keysByCount sorts keys with higher counts first, breaking ties by key string order.typekeysByCountstruct {keys []stringcountmap[string]int}func ( *keysByCount) () int { returnlen(.keys) }func ( *keysByCount) (, int) { .keys[], .keys[] = .keys[], .keys[] }func ( *keysByCount) (, int) bool { , := .keys[], .keys[] , := .count[], .count[]if != {return > }return < }// printStackRecord prints the function + source line information// for a single stack trace.func ( io.Writer, []uintptr, bool) { := := runtime.CallersFrames()for { , := .Next() := .Functionif == "" { = truefmt.Fprintf(, "#\t%#x\n", .PC) } elseif != "runtime.goexit" && ( || !strings.HasPrefix(, "runtime.")) {// Hide runtime.goexit and any runtime functions at the beginning. // This is useful mainly for allocation traces. = truefmt.Fprintf(, "#\t%#x\t%s+%#x\t%s:%d\n", .PC, , .PC-.Entry, .File, .Line) }if ! {break } }if ! {// We didn't print anything; do it again, // and this time include runtime functions. (, , true)return }fmt.Fprintf(, "\n")}// Interface to system profiles.// WriteHeapProfile is shorthand for Lookup("heap").WriteTo(w, 0).// It is preserved for backwards compatibility.func ( io.Writer) error {returnwriteHeap(, 0)}// countHeap returns the number of records in the heap profile.func () int { , := runtime.MemProfile(nil, true)return}// writeHeap writes the current runtime heap profile to w.func ( io.Writer, int) error {returnwriteHeapInternal(, , "")}// writeAlloc writes the current runtime heap profile to w// with the total allocation space as the default sample type.func ( io.Writer, int) error {returnwriteHeapInternal(, , "alloc_space")}func ( io.Writer, int, string) error {var *runtime.MemStatsif != 0 {// Read mem stats first, so that our other allocations // do not appear in the statistics. = new(runtime.MemStats)runtime.ReadMemStats() }// Find out how many records there are (MemProfile(nil, true)), // allocate that many records, and get the data. // There's a race—more records might be added between // the two calls—so allocate a few extra records for safety // and also try again if we're very unlucky. // The loop should only execute one iteration in the common case.var []runtime.MemProfileRecord , := runtime.MemProfile(nil, true)for {// Allocate room for a slightly bigger profile, // in case a few more entries have been added // since the call to MemProfile. = make([]runtime.MemProfileRecord, +50) , = runtime.MemProfile(, true)if { = [0:]break }// Profile grew; try again. }if == 0 {returnwriteHeapProto(, , int64(runtime.MemProfileRate), ) }sort.Slice(, func(, int) bool { return [].InUseBytes() > [].InUseBytes() }) := bufio.NewWriter() := tabwriter.NewWriter(, 1, 8, 1, '\t', 0) = varruntime.MemProfileRecordfor := range { := &[] .AllocBytes += .AllocBytes .AllocObjects += .AllocObjects .FreeBytes += .FreeBytes .FreeObjects += .FreeObjects }// Technically the rate is MemProfileRate not 2*MemProfileRate, // but early versions of the C++ heap profiler reported 2*MemProfileRate, // so that's what pprof has come to expect.fmt.Fprintf(, "heap profile: %d: %d [%d: %d] @ heap/%d\n", .InUseObjects(), .InUseBytes(), .AllocObjects, .AllocBytes,2*runtime.MemProfileRate)for := range { := &[]fmt.Fprintf(, "%d: %d [%d: %d] @", .InUseObjects(), .InUseBytes(), .AllocObjects, .AllocBytes)for , := range .Stack() {fmt.Fprintf(, " %#x", ) }fmt.Fprintf(, "\n")printStackRecord(, .Stack(), false) }// Print memstats information too. // Pprof will ignore, but useful for people := fmt.Fprintf(, "\n# runtime.MemStats\n")fmt.Fprintf(, "# Alloc = %d\n", .Alloc)fmt.Fprintf(, "# TotalAlloc = %d\n", .TotalAlloc)fmt.Fprintf(, "# Sys = %d\n", .Sys)fmt.Fprintf(, "# Lookups = %d\n", .Lookups)fmt.Fprintf(, "# Mallocs = %d\n", .Mallocs)fmt.Fprintf(, "# Frees = %d\n", .Frees)fmt.Fprintf(, "# HeapAlloc = %d\n", .HeapAlloc)fmt.Fprintf(, "# HeapSys = %d\n", .HeapSys)fmt.Fprintf(, "# HeapIdle = %d\n", .HeapIdle)fmt.Fprintf(, "# HeapInuse = %d\n", .HeapInuse)fmt.Fprintf(, "# HeapReleased = %d\n", .HeapReleased)fmt.Fprintf(, "# HeapObjects = %d\n", .HeapObjects)fmt.Fprintf(, "# Stack = %d / %d\n", .StackInuse, .StackSys)fmt.Fprintf(, "# MSpan = %d / %d\n", .MSpanInuse, .MSpanSys)fmt.Fprintf(, "# MCache = %d / %d\n", .MCacheInuse, .MCacheSys)fmt.Fprintf(, "# BuckHashSys = %d\n", .BuckHashSys)fmt.Fprintf(, "# GCSys = %d\n", .GCSys)fmt.Fprintf(, "# OtherSys = %d\n", .OtherSys)fmt.Fprintf(, "# NextGC = %d\n", .NextGC)fmt.Fprintf(, "# LastGC = %d\n", .LastGC)fmt.Fprintf(, "# PauseNs = %d\n", .PauseNs)fmt.Fprintf(, "# PauseEnd = %d\n", .PauseEnd)fmt.Fprintf(, "# NumGC = %d\n", .NumGC)fmt.Fprintf(, "# NumForcedGC = %d\n", .NumForcedGC)fmt.Fprintf(, "# GCCPUFraction = %v\n", .GCCPUFraction)fmt.Fprintf(, "# DebugGC = %v\n", .DebugGC)// Also flush out MaxRSS on supported platforms.addMaxRSS() .Flush()return .Flush()}// countThreadCreate returns the size of the current ThreadCreateProfile.func () int { , := runtime.ThreadCreateProfile(nil)return}// writeThreadCreate writes the current runtime ThreadCreateProfile to w.func ( io.Writer, int) error {// Until https://golang.org/issues/6104 is addressed, wrap // ThreadCreateProfile because there's no point in tracking labels when we // don't get any stack-traces.returnwriteRuntimeProfile(, , "threadcreate", func( []runtime.StackRecord, []unsafe.Pointer) ( int, bool) {returnruntime.ThreadCreateProfile() })}// countGoroutine returns the number of goroutines.func () int {returnruntime.NumGoroutine()}// runtime_goroutineProfileWithLabels is defined in runtime/mprof.gofunc ( []runtime.StackRecord, []unsafe.Pointer) ( int, bool)// writeGoroutine writes the current runtime GoroutineProfile to w.func ( io.Writer, int) error {if >= 2 {returnwriteGoroutineStacks() }returnwriteRuntimeProfile(, , "goroutine", runtime_goroutineProfileWithLabels)}func ( io.Writer) error {// We don't know how big the buffer needs to be to collect // all the goroutines. Start with 1 MB and try a few times, doubling each time. // Give up and use a truncated trace if 64 MB is not enough. := make([]byte, 1<<20)for := 0; ; ++ { := runtime.Stack(, true)if < len() { = [:]break }iflen() >= 64<<20 {// Filled 64 MB - stop there.break } = make([]byte, 2*len()) } , := .Write()return}func ( io.Writer, int, string, func([]runtime.StackRecord, []unsafe.Pointer) (int, bool)) error {// Find out how many records there are (fetch(nil)), // allocate that many records, and get the data. // There's a race—more records might be added between // the two calls—so allocate a few extra records for safety // and also try again if we're very unlucky. // The loop should only execute one iteration in the common case.var []runtime.StackRecordvar []unsafe.Pointer , := (nil, nil)for {// Allocate room for a slightly bigger profile, // in case a few more entries have been added // since the call to ThreadProfile. = make([]runtime.StackRecord, +10) = make([]unsafe.Pointer, +10) , = (, )if { = [0:]break }// Profile grew; try again. }returnprintCountProfile(, , , &runtimeProfile{, })}typeruntimeProfilestruct {stk []runtime.StackRecordlabels []unsafe.Pointer}func ( *runtimeProfile) () int { returnlen(.stk) }func ( *runtimeProfile) ( int) []uintptr { return .stk[].Stack() }func ( *runtimeProfile) ( int) *labelMap { return (*labelMap)(.labels[]) }varcpustruct {sync.Mutex profiling bool done chanbool}// StartCPUProfile enables CPU profiling for the current process.// While profiling, the profile will be buffered and written to w.// StartCPUProfile returns an error if profiling is already enabled.//// On Unix-like systems, StartCPUProfile does not work by default for// Go code built with -buildmode=c-archive or -buildmode=c-shared.// StartCPUProfile relies on the SIGPROF signal, but that signal will// be delivered to the main program's SIGPROF signal handler (if any)// not to the one used by Go. To make it work, call os/signal.Notify// for syscall.SIGPROF, but note that doing so may break any profiling// being done by the main program.func ( io.Writer) error {// The runtime routines allow a variable profiling rate, // but in practice operating systems cannot trigger signals // at more than about 500 Hz, and our processing of the // signal is not cheap (mostly getting the stack trace). // 100 Hz is a reasonable choice: it is frequent enough to // produce useful data, rare enough not to bog down the // system, and a nice round number to make it easy to // convert sample counts to seconds. Instead of requiring // each client to specify the frequency, we hard code it.const = 100cpu.Lock()defercpu.Unlock()ifcpu.done == nil {cpu.done = make(chanbool) }// Double-check.ifcpu.profiling {returnfmt.Errorf("cpu profiling already in use") }cpu.profiling = trueruntime.SetCPUProfileRate()goprofileWriter()returnnil}// readProfile, provided by the runtime, returns the next chunk of// binary CPU profiling stack trace data, blocking until data is available.// If profiling is turned off and all the profile data accumulated while it was// on has been returned, readProfile returns eof=true.// The caller must save the returned data and tags before calling readProfile again.func () ( []uint64, []unsafe.Pointer, bool)func ( io.Writer) { := newProfileBuilder()varerrorfor {time.Sleep(100 * time.Millisecond) , , := readProfile()if := .addCPUData(, ); != nil && == nil { = }if {break } }if != nil {// The runtime should never produce an invalid or truncated profile. // It drops records that can't fit into its log buffers.panic("runtime/pprof: converting profile: " + .Error()) } .build()cpu.done <- true}// StopCPUProfile stops the current CPU profile, if any.// StopCPUProfile only returns after all the writes for the// profile have completed.func () {cpu.Lock()defercpu.Unlock()if !cpu.profiling {return }cpu.profiling = falseruntime.SetCPUProfileRate(0) <-cpu.done}// countBlock returns the number of records in the blocking profile.func () int { , := runtime.BlockProfile(nil)return}// countMutex returns the number of records in the mutex profile.func () int { , := runtime.MutexProfile(nil)return}// writeBlock writes the current blocking profile to w.func ( io.Writer, int) error {returnwriteProfileInternal(, , "contention", runtime.BlockProfile, scaleBlockProfile)}func ( int64, float64) (int64, float64) {// Do nothing. // The current way of block profile sampling makes it // hard to compute the unsampled number. The legacy block // profile parse doesn't attempt to scale or unsample.return , }// writeMutex writes the current mutex profile to w.func ( io.Writer, int) error {returnwriteProfileInternal(, , "mutex", runtime.MutexProfile, scaleMutexProfile)}// writeProfileInternal writes the current blocking or mutex profile depending on the passed parametersfunc ( io.Writer, int, string, func([]runtime.BlockProfileRecord) (int, bool), func(int64, float64) (int64, float64)) error {var []runtime.BlockProfileRecord , := (nil)for { = make([]runtime.BlockProfileRecord, +50) , = ()if { = [:]break } }sort.Slice(, func(, int) bool { return [].Cycles > [].Cycles })if <= 0 {returnprintCountCycleProfile(, "contentions", "delay", , ) } := bufio.NewWriter() := tabwriter.NewWriter(, 1, 8, 1, '\t', 0) = fmt.Fprintf(, "--- %v:\n", )fmt.Fprintf(, "cycles/second=%v\n", runtime_cyclesPerSecond())if == "mutex" {fmt.Fprintf(, "sampling period=%d\n", runtime.SetMutexProfileFraction(-1)) }for := range { := &[]fmt.Fprintf(, "%v %v @", .Cycles, .Count)for , := range .Stack() {fmt.Fprintf(, " %#x", ) }fmt.Fprint(, "\n")if > 0 {printStackRecord(, .Stack(), true) } }if != nil { .Flush() }return .Flush()}func ( int64, float64) (int64, float64) { := runtime.SetMutexProfileFraction(-1)return * int64(), * float64()}func () int64
The pages are generated with Goldsv0.4.9. (GOOS=linux GOARCH=amd64)