lib/ur: Implement crash (panic) reporting (fixes #959) (#5702)

* lib/ur: Implement crash (panic) reporting (fixes #959)

This implements a simple crash reporting method. It piggybacks on the
panic log files created by the monitor process, picking these up and
uploading them from the usage reporting routine.

A new config value points to the crash receiver base URL, which defaults
to "https://crash.syncthing.net/newcrash" (following the pattern of
"https://data.syncthing.net/newdata" for usage reports, but allowing us
to separate the service as required).
This commit is contained in:
Jakob Borg
2019-06-11 08:19:11 +02:00
committed by GitHub
parent 93e57bd357
commit 42ce6be9b9
19 changed files with 1925 additions and 19 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,160 @@
// Copyright (C) 2019 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package main
import (
"bytes"
"errors"
"io/ioutil"
"regexp"
"strings"
raven "github.com/getsentry/raven-go"
"github.com/maruel/panicparse/stack"
)
const reportServer = "https://crash.syncthing.net/report/"
func sendReport(dsn, path string, report []byte) error {
pkt, err := parseReport(path, report)
if err != nil {
return err
}
cli, err := raven.New(dsn)
if err != nil {
return err
}
// The client sets release and such on the packet before sending, in the
// misguided idea that it knows this better than than the packet we give
// it. So we copy the values from the packet to the client first...
cli.SetRelease(pkt.Release)
cli.SetEnvironment(pkt.Environment)
_, errC := cli.Capture(pkt, nil)
return <-errC
}
func parseReport(path string, report []byte) (*raven.Packet, error) {
parts := bytes.SplitN(report, []byte("\n"), 2)
if len(parts) != 2 {
return nil, errors.New("no first line")
}
version, err := parseVersion(string(parts[0]))
if err != nil {
return nil, err
}
report = parts[1]
foundPanic := false
var subjectLine []byte
for {
parts = bytes.SplitN(report, []byte("\n"), 2)
if len(parts) != 2 {
return nil, errors.New("no panic line found")
}
line := parts[0]
report = parts[1]
if foundPanic {
// The previous line was our "Panic at ..." header. We are now
// at the beginning of the real panic trace and this is our
// subject line.
subjectLine = line
break
} else if bytes.HasPrefix(line, []byte("Panic at")) {
foundPanic = true
}
}
r := bytes.NewReader(report)
ctx, err := stack.ParseDump(r, ioutil.Discard, false)
if err != nil {
return nil, err
}
var trace raven.Stacktrace
for _, gr := range ctx.Goroutines {
if gr.First {
trace.Frames = make([]*raven.StacktraceFrame, len(gr.Stack.Calls))
for i, sc := range gr.Stack.Calls {
trace.Frames[len(trace.Frames)-1-i] = &raven.StacktraceFrame{
Function: sc.Func.Name(),
Module: sc.Func.PkgName(),
Filename: sc.SrcPath,
Lineno: sc.Line,
}
}
break
}
}
pkt := &raven.Packet{
Message: string(subjectLine),
Platform: "go",
Release: version.tag,
Tags: raven.Tags{
raven.Tag{Key: "version", Value: version.version},
raven.Tag{Key: "tag", Value: version.tag},
raven.Tag{Key: "commit", Value: version.commit},
raven.Tag{Key: "codename", Value: version.codename},
raven.Tag{Key: "runtime", Value: version.runtime},
raven.Tag{Key: "goos", Value: version.goos},
raven.Tag{Key: "goarch", Value: version.goarch},
raven.Tag{Key: "builder", Value: version.builder},
},
Extra: raven.Extra{
"url": reportServer + path,
},
Interfaces: []raven.Interface{&trace},
}
return pkt, nil
}
// syncthing v1.1.4-rc.1+30-g6aaae618-dirty-crashrep "Erbium Earthworm" (go1.12.5 darwin-amd64) jb@kvin.kastelo.net 2019-05-23 16:08:14 UTC
var longVersionRE = regexp.MustCompile(`syncthing\s+(v[^\s]+)\s+"([^"]+)"\s\(([^\s]+)\s+([^-]+)-([^)]+)\)\s+([^\s]+)`)
type version struct {
version string // "v1.1.4-rc.1+30-g6aaae618-dirty-crashrep"
tag string // "v1.1.4-rc.1"
commit string // "6aaae618", blank when absent
codename string // "Erbium Earthworm"
runtime string // "go1.12.5"
goos string // "darwin"
goarch string // "amd64"
builder string // "jb@kvin.kastelo.net"
}
func parseVersion(line string) (version, error) {
m := longVersionRE.FindStringSubmatch(line)
if len(m) == 0 {
return version{}, errors.New("unintelligeble version string")
}
v := version{
version: m[1],
codename: m[2],
runtime: m[3],
goos: m[4],
goarch: m[5],
builder: m[6],
}
parts := strings.Split(v.version, "+")
v.tag = parts[0]
if len(parts) > 1 {
fields := strings.Split(parts[1], "-")
if len(fields) >= 2 && strings.HasPrefix(fields[1], "g") {
v.commit = fields[1][1:]
}
}
return v, nil
}

View File

@@ -0,0 +1,64 @@
// Copyright (C) 2019 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package main
import (
"fmt"
"io/ioutil"
"testing"
)
func TestParseVersion(t *testing.T) {
cases := []struct {
longVersion string
parsed version
}{
{
longVersion: `syncthing v1.1.4-rc.1+30-g6aaae618-dirty-crashrep "Erbium Earthworm" (go1.12.5 darwin-amd64) jb@kvin.kastelo.net 2019-05-23 16:08:14 UTC`,
parsed: version{
version: "v1.1.4-rc.1+30-g6aaae618-dirty-crashrep",
tag: "v1.1.4-rc.1",
commit: "6aaae618",
codename: "Erbium Earthworm",
runtime: "go1.12.5",
goos: "darwin",
goarch: "amd64",
builder: "jb@kvin.kastelo.net",
},
},
}
for _, tc := range cases {
v, err := parseVersion(tc.longVersion)
if err != nil {
t.Error(err)
continue
}
if v != tc.parsed {
t.Error(v)
}
}
}
func TestParseReport(t *testing.T) {
bs, err := ioutil.ReadFile("_testdata/panic.log")
if err != nil {
t.Fatal(err)
}
pkt, err := parseReport("1/2/345", bs)
if err != nil {
t.Fatal(err)
}
bs, err = pkt.JSON()
if err != nil {
t.Fatal(err)
}
fmt.Printf("%s\n", bs)
}

View File

@@ -0,0 +1,135 @@
// Copyright (C) 2019 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
// Command stcrashreceiver is a trivial HTTP server that allows two things:
//
// - uploading files (crash reports) named like a SHA256 hash using a PUT request
// - checking whether such file exists using a HEAD request
//
// Typically this should be deployed behind something that manages HTTPS.
package main
import (
"flag"
"io"
"io/ioutil"
"log"
"net/http"
"os"
"path"
"path/filepath"
"strings"
)
const maxRequestSize = 1 << 20 // 1 MiB
func main() {
dir := flag.String("dir", ".", "Directory to store reports in")
dsn := flag.String("dsn", "", "Sentry DSN")
listen := flag.String("listen", ":22039", "HTTP listen address")
flag.Parse()
cr := &crashReceiver{
dir: *dir,
dsn: *dsn,
}
log.SetOutput(os.Stdout)
if err := http.ListenAndServe(*listen, cr); err != nil {
log.Fatalln("HTTP serve:", err)
}
}
type crashReceiver struct {
dir string
dsn string
}
func (r *crashReceiver) ServeHTTP(w http.ResponseWriter, req *http.Request) {
// The final path component should be a SHA256 hash in hex, so 64 hex
// characters. We don't care about case on the request but use lower
// case internally.
base := strings.ToLower(path.Base(req.URL.Path))
if len(base) != 64 {
http.Error(w, "Bad request", http.StatusBadRequest)
return
}
for _, c := range base {
if c >= 'a' && c <= 'f' {
continue
}
if c >= '0' && c <= '9' {
continue
}
http.Error(w, "Bad request", http.StatusBadRequest)
return
}
switch req.Method {
case http.MethodHead:
r.serveHead(base, w, req)
case http.MethodPut:
r.servePut(base, w, req)
default:
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
}
}
// serveHead responds to HEAD requests by checking if the named report
// already exists in the system.
func (r *crashReceiver) serveHead(base string, w http.ResponseWriter, _ *http.Request) {
path := filepath.Join(r.dirFor(base), base)
if _, err := os.Lstat(path); err != nil {
http.Error(w, "Not found", http.StatusNotFound)
}
// 200 OK
}
// servePut accepts and stores the given report.
func (r *crashReceiver) servePut(base string, w http.ResponseWriter, req *http.Request) {
path := filepath.Join(r.dirFor(base), base)
fullPath := filepath.Join(r.dir, path)
// Ensure the destination directory exists
if err := os.MkdirAll(filepath.Dir(fullPath), 0755); err != nil {
log.Printf("Creating directory for report %s: %v", base, err)
http.Error(w, "Internal server error", http.StatusInternalServerError)
return
}
// Read at most maxRequestSize of report data.
log.Println("Receiving report", base)
lr := io.LimitReader(req.Body, maxRequestSize)
bs, err := ioutil.ReadAll(lr)
if err != nil {
log.Println("Reading report:", err)
http.Error(w, "Internal server error", http.StatusInternalServerError)
return
}
// Create an output file
err = ioutil.WriteFile(fullPath, bs, 0644)
if err != nil {
log.Printf("Creating file for report %s: %v", base, err)
http.Error(w, "Internal server error", http.StatusInternalServerError)
return
}
// Send the report to Sentry
if r.dsn != "" {
go func() {
// There's no need for the client to have to wait for this part.
if err := sendReport(r.dsn, path, bs); err != nil {
log.Println("Failed to send report:", err)
}
}()
}
}
// 01234567890abcdef... => 01/23
func (r *crashReceiver) dirFor(base string) string {
return filepath.Join(base[0:2], base[2:4])
}

View File

@@ -0,0 +1,152 @@
// Copyright (C) 2019 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package main
import (
"bytes"
"context"
"fmt"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/syncthing/syncthing/lib/sha256"
)
const (
headRequestTimeout = 10 * time.Second
putRequestTimeout = time.Minute
)
// uploadPanicLogs attempts to upload all the panic logs in the named
// directory to the crash reporting server as urlBase. Uploads are attempted
// with the newest log first.
//
// This can can block for a long time. The context can set a final deadline
// for this.
func uploadPanicLogs(ctx context.Context, urlBase, dir string) {
files, err := filepath.Glob(filepath.Join(dir, "panic-*.log"))
if err != nil {
l.Warnln("Failed to list panic logs:", err)
return
}
sort.Sort(sort.Reverse(sort.StringSlice(files)))
for _, file := range files {
if strings.Contains(file, ".reported.") {
// We've already sent this file. It'll be cleaned out at some
// point.
continue
}
if err := uploadPanicLog(ctx, urlBase, file); err != nil {
l.Warnln("Reporting crash:", err)
} else {
// Rename the log so we don't have to try to report it again. This
// succeeds, or it does not. There is no point complaining about it.
_ = os.Rename(file, strings.Replace(file, ".log", ".reported.log", 1))
}
}
}
// uploadPanicLog attempts to upload the named panic log to the crash
// reporting server at urlBase. The panic ID is constructed as the sha256 of
// the log contents. A HEAD request is made to see if the log has already
// been reported. If not, a PUT is made with the log contents.
func uploadPanicLog(ctx context.Context, urlBase, file string) error {
data, err := ioutil.ReadFile(file)
if err != nil {
return err
}
// Remove log lines, for privacy.
data = filterLogLines(data)
hash := fmt.Sprintf("%x", sha256.Sum256(data))
l.Infof("Reporting crash found in %s (report ID %s) ...\n", filepath.Base(file), hash[:8])
url := fmt.Sprintf("%s/%s", urlBase, hash)
headReq, err := http.NewRequest(http.MethodHead, url, nil)
if err != nil {
return err
}
// Set a reasonable timeout on the HEAD request
headCtx, headCancel := context.WithTimeout(ctx, headRequestTimeout)
defer headCancel()
headReq = headReq.WithContext(headCtx)
resp, err := http.DefaultClient.Do(headReq)
if err != nil {
return err
}
resp.Body.Close()
if resp.StatusCode == http.StatusOK {
// It's known, we're done
return nil
}
putReq, err := http.NewRequest(http.MethodPut, url, bytes.NewReader(data))
if err != nil {
return err
}
// Set a reasonable timeout on the PUT request
putCtx, putCancel := context.WithTimeout(ctx, putRequestTimeout)
defer putCancel()
putReq = putReq.WithContext(putCtx)
resp, err = http.DefaultClient.Do(putReq)
if err != nil {
return err
}
resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("upload: %s", resp.Status)
}
return nil
}
// filterLogLines returns the data without any log lines between the first
// line and the panic trace. This is done in-place: the original data slice
// is destroyed.
func filterLogLines(data []byte) []byte {
filtered := data[:0]
matched := false
for _, line := range bytes.Split(data, []byte("\n")) {
switch {
case !matched && bytes.HasPrefix(line, []byte("Panic ")):
// This begins the panic trace, set the matched flag and append.
matched = true
fallthrough
case len(filtered) == 0 || matched:
// This is the first line or inside the panic trace.
if len(filtered) > 0 {
// We add the newline before rather than after because
// bytes.Split sees the \n as *separator* and not line
// ender, so ir will generate a last empty line that we
// don't really want. (We want to keep blank lines in the
// middle of the trace though.)
filtered = append(filtered, '\n')
}
// Remove the device ID prefix. The "plus two" stuff is because
// the line will look like "[foo] whatever" and the end variable
// will end up pointing at the ] and we want to step over that
// and the following space.
if end := bytes.Index(line, []byte("]")); end > 1 && end < len(line)-2 && bytes.HasPrefix(line, []byte("[")) {
line = line[end+2:]
}
filtered = append(filtered, line...)
}
}
return filtered
}

View File

@@ -0,0 +1,37 @@
// Copyright (C) 2019 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package main
import (
"bytes"
"testing"
)
func TestFilterLogLines(t *testing.T) {
in := []byte(`[ABCD123] syncthing version whatever
here is more log data
and more
...
and some more
yet more
Panic detected at like right now
here is panic data
and yet more panic stuff
`)
filtered := []byte(`syncthing version whatever
Panic detected at like right now
here is panic data
and yet more panic stuff
`)
result := filterLogLines(in)
if !bytes.Equal(result, filtered) {
t.Logf("%q\n", result)
t.Error("it should have been filtered")
}
}

View File

@@ -8,6 +8,7 @@ package main
import (
"bufio"
"context"
"io"
"os"
"os/exec"
@@ -33,6 +34,8 @@ const (
loopThreshold = 60 * time.Second
logFileAutoCloseDelay = 5 * time.Second
logFileMaxOpenTime = time.Minute
panicUploadMaxWait = 30 * time.Second
panicUploadNoticeWait = 10 * time.Second
)
func monitorMain(runtimeOptions RuntimeOptions) {
@@ -72,6 +75,8 @@ func monitorMain(runtimeOptions RuntimeOptions) {
childEnv := childEnv()
first := true
for {
maybeReportPanics()
if t := time.Since(restarts[0]); t < loopThreshold {
l.Warnf("%d restarts in %v; not retrying further", countRestarts, t)
os.Exit(exitError)
@@ -173,6 +178,13 @@ func copyStderr(stderr io.Reader, dst io.Writer) {
br := bufio.NewReader(stderr)
var panicFd *os.File
defer func() {
if panicFd != nil {
_ = panicFd.Close()
maybeReportPanics()
}
}()
for {
line, err := br.ReadString('\n')
if err != nil {
@@ -430,3 +442,39 @@ func childEnv() []string {
env = append(env, "STMONITORED=yes")
return env
}
// maybeReportPanics tries to figure out if crash reporting is on or off,
// and reports any panics it can find if it's enabled. We spend at most
// panicUploadMaxWait uploading panics...
func maybeReportPanics() {
// Try to get a config to see if/where panics should be reported.
cfg, err := loadOrDefaultConfig()
if err != nil {
l.Warnln("Couldn't load config; not reporting crash")
return
}
// Bail if we're not supposed to report panics.
opts := cfg.Options()
if !opts.CREnabled {
return
}
// Set up a timeout on the whole operation.
ctx, cancel := context.WithTimeout(context.Background(), panicUploadMaxWait)
defer cancel()
// Print a notice if the upload takes a long time.
go func() {
select {
case <-ctx.Done():
return
case <-time.After(panicUploadNoticeWait):
l.Warnln("Uploading crash reports is taking a while, please wait...")
}
}()
// Report the panics.
dir := locations.GetBaseDir(locations.ConfigBaseDir)
uploadPanicLogs(ctx, opts.CRURL, dir)
}