zrepl/zfs/versions_destroy.go
Christian Schwarz 1462c5caa5 zfs: fix batch destroy panic if all snaps are undestroyable
See https://github.com/zrepl/zrepl/pull/259#issuecomment-585334023

panic: runtime error: index out of range [0] with length 0
goroutine 14 [running]:
github.com/zrepl/zrepl/zfs.tryBatch(0xd6aa20, 0xc0000b8018, 0xc00025e0c0, 0x0, 0x6, 0xd61d80, 0x1280df8, 0xd58920, 0xc000132000)
        zrepl/zfs/versions_destroy.go:129 +0x302
github.com/zrepl/zrepl/zfs.doDestroyBatchedRec(0xd6aa20, 0xc0000b8018, 0xc000578a80, 0x6, 0x6, 0xd61d80, 0x1280df8)
        zrepl/zfs/versions_destroy.go:184 +0x4a5
github.com/zrepl/zrepl/zfs.doDestroyBatched(0xd6aa20, 0xc0000b8018, 0xc000222780, 0x6, 0x8, 0xd61d80, 0x1280df8)
        zrepl/zfs/versions_destroy.go:95 +0xc7
github.com/zrepl/zrepl/zfs.doDestroy(0xd6aa20, 0xc0000b8018, 0xc0005788d0, 0x6, 0x6, 0xd61d80, 0x1280df8)
        zrepl/zfs/versions_destroy.go:82 +0x362
github.com/zrepl/zrepl/zfs.ZFSDestroyFilesystemVersions(...)
        zrepl/zfs/versions_destroy.go:41
github.com/zrepl/zrepl/endpoint.doDestroySnapshots(0xd6aaa0, 0xc0004412c0, 0xc00057ca00, 0xc0005785a0, 0x6, 0x6, 0xb68940, 0xc5df01, 0xc000150a80)
        zrepl/endpoint/endpoint.go:785 +0x388
github.com/zrepl/zrepl/endpoint.(*Receiver).DestroySnapshots(0xc000127500, 0xd6aaa0, 0xc0004412c0, 0xc0002ca280, 0xc000150880, 0xd73ca0, 0xc00057c960)
        zrepl/endpoint/endpoint.go:751 +0xdb
github.com/zrepl/zrepl/daemon/pruner.doOneAttemptExec(0xc000429980, 0xc000429958, 0xc0001cb180)
        zrepl/daemon/pruner/pruner.go:531 +0x51f
github.com/zrepl/zrepl/daemon/pruner.doOneAttempt(0xc000429980, 0xc000429958)
        zrepl/daemon/pruner/pruner.go:486 +0x1064
github.com/zrepl/zrepl/daemon/pruner.(*Pruner).prune(0xc00011e280, 0xd6aaa0, 0xc0004412c0, 0x7f4906fff7e8, 0xc000127500, 0x7f4906fff738, 0xc0001324e0, 0xc000064420, 0x1, 0x1, ...)
        zrepl/daemon/pruner/pruner.go:214 +0x53
github.com/zrepl/zrepl/daemon/pruner.(*Pruner).Prune(...)
        zrepl/daemon/pruner/pruner.go:200
github.com/zrepl/zrepl/daemon/job.(*ActiveSide).do(0xc000268000, 0xd6a9e0, 0xc0002223c0)
        zrepl/daemon/job/active.go:482 +0x906
github.com/zrepl/zrepl/daemon/job.(*ActiveSide).Run(0xc000268000, 0xd6aaa0, 0xc000127080)
        zrepl/daemon/job/active.go:404 +0x289
github.com/zrepl/zrepl/daemon.(*jobs).start.func1(0xc000032200, 0xd73ca0, 0xc00000f2e0, 0xd6efa0, 0xc000268000, 0xd6aaa0, 0xc000126c90)
        zrepl/daemon/daemon.go:220 +0x121
created by github.com/zrepl/zrepl/daemon.(*jobs).start
        zrepl/daemon/daemon.go:216 +0x52e
2020-02-14 22:00:13 +01:00

239 lines
6.6 KiB
Go

package zfs
import (
"context"
"fmt"
"os"
"os/exec"
"sort"
"strings"
"sync"
"syscall"
"github.com/zrepl/zrepl/util/envconst"
)
func ZFSDestroyFilesystemVersion(filesystem *DatasetPath, version *FilesystemVersion) (err error) {
datasetPath := version.ToAbsPath(filesystem)
// Sanity check...
if !strings.ContainsAny(datasetPath, "@#") {
return fmt.Errorf("sanity check failed: no @ or # character found in %q", datasetPath)
}
return ZFSDestroy(datasetPath)
}
var destroyerSingleton = destroyerImpl{}
type DestroySnapOp struct {
Filesystem string
Name string
ErrOut *error
}
func (o *DestroySnapOp) String() string {
return fmt.Sprintf("destroy operation %s@%s", o.Filesystem, o.Name)
}
func ZFSDestroyFilesystemVersions(reqs []*DestroySnapOp) {
doDestroy(context.TODO(), reqs, destroyerSingleton)
}
func setDestroySnapOpErr(b []*DestroySnapOp, err error) {
for _, r := range b {
*r.ErrOut = err
}
}
type destroyer interface {
Destroy(args []string) error
DestroySnapshotsCommaSyntaxSupported() (bool, error)
}
func doDestroy(ctx context.Context, reqs []*DestroySnapOp, e destroyer) {
var validated []*DestroySnapOp
for _, req := range reqs {
// Filesystem and Snapshot should not be empty
// ZFS will generally fail because those are invalid destroy arguments,
// but we'd rather apply defensive programming here (doing destroy after all)
if req.Filesystem == "" {
*req.ErrOut = fmt.Errorf("Filesystem must not be an empty string")
} else if req.Name == "" {
*req.ErrOut = fmt.Errorf("Name must not be an empty string")
} else {
validated = append(validated, req)
}
}
reqs = validated
commaSupported, err := e.DestroySnapshotsCommaSyntaxSupported()
if err != nil {
debug("destroy: comma syntax support detection failed: %s", err)
setDestroySnapOpErr(reqs, err)
return
}
if !commaSupported {
doDestroySeq(ctx, reqs, e)
} else {
doDestroyBatched(ctx, reqs, e)
}
}
func doDestroySeq(ctx context.Context, reqs []*DestroySnapOp, e destroyer) {
for _, r := range reqs {
*r.ErrOut = e.Destroy([]string{fmt.Sprintf("%s@%s", r.Filesystem, r.Name)})
}
}
func doDestroyBatched(ctx context.Context, reqs []*DestroySnapOp, d destroyer) {
perFS := buildBatches(reqs)
for _, fsbatch := range perFS {
doDestroyBatchedRec(ctx, fsbatch, d)
}
}
func buildBatches(reqs []*DestroySnapOp) [][]*DestroySnapOp {
if len(reqs) == 0 {
return nil
}
sorted := make([]*DestroySnapOp, len(reqs))
copy(sorted, reqs)
sort.SliceStable(sorted, func(i, j int) bool {
// by filesystem, then snap name
fscmp := strings.Compare(sorted[i].Filesystem, sorted[j].Filesystem)
if fscmp != 0 {
return fscmp == -1
}
return strings.Compare(sorted[i].Name, sorted[j].Name) == -1
})
// group by fs
var perFS [][]*DestroySnapOp
consumed := 0
for consumed < len(sorted) {
batchConsumedUntil := consumed
for ; batchConsumedUntil < len(sorted) && sorted[batchConsumedUntil].Filesystem == sorted[consumed].Filesystem; batchConsumedUntil++ {
}
perFS = append(perFS, sorted[consumed:batchConsumedUntil])
consumed = batchConsumedUntil
}
return perFS
}
// batch must be on same Filesystem, panics otherwise
func tryBatch(ctx context.Context, batch []*DestroySnapOp, d destroyer) error {
if len(batch) == 0 {
return nil
}
batchFS := batch[0].Filesystem
batchNames := make([]string, len(batch))
for i := range batchNames {
batchNames[i] = batch[i].Name
if batchFS != batch[i].Filesystem {
panic("inconsistent batch")
}
}
batchArg := fmt.Sprintf("%s@%s", batchFS, strings.Join(batchNames, ","))
return d.Destroy([]string{batchArg})
}
// fsbatch must be on same filesystem
func doDestroyBatchedRec(ctx context.Context, fsbatch []*DestroySnapOp, d destroyer) {
if len(fsbatch) <= 1 {
doDestroySeq(ctx, fsbatch, d)
return
}
err := tryBatch(ctx, fsbatch, d)
if err == nil {
setDestroySnapOpErr(fsbatch, nil)
return
}
if pe, ok := err.(*os.PathError); ok && pe.Err == syscall.E2BIG {
// see TestExcessiveArgumentsResultInE2BIG
// try halving batch size, assuming snapshots names are roughly the same length
debug("batch destroy: E2BIG encountered: %s", err)
doDestroyBatchedRec(ctx, fsbatch[0:len(fsbatch)/2], d)
doDestroyBatchedRec(ctx, fsbatch[len(fsbatch)/2:], d)
return
}
singleRun := fsbatch // the destroys that will be tried sequentially after "smart" error handling below
if err, ok := err.(*DestroySnapshotsError); ok {
// eliminate undestroyable datasets from batch and try it once again
strippedBatch, remaining := make([]*DestroySnapOp, 0, len(fsbatch)), make([]*DestroySnapOp, 0, len(fsbatch))
for _, b := range fsbatch {
isUndestroyable := false
for _, undestroyable := range err.Undestroyable {
if undestroyable == b.Name {
isUndestroyable = true
break
}
}
if isUndestroyable {
remaining = append(remaining, b)
} else {
strippedBatch = append(strippedBatch, b)
}
}
err := tryBatch(ctx, strippedBatch, d)
if err != nil {
// run entire batch sequentially if the stripped one fails
// (it shouldn't because we stripped erronous datasets)
singleRun = fsbatch // shadow
} else {
setDestroySnapOpErr(strippedBatch, nil) // these ones worked
singleRun = remaining // shadow
}
// fallthrough
}
doDestroySeq(ctx, singleRun, d)
}
type destroyerImpl struct{}
func (d destroyerImpl) Destroy(args []string) error {
if len(args) != 1 {
// we have no use case for this at the moment, so let's crash (safer than destroying something unexpectedly)
panic(fmt.Sprintf("unexpected number of arguments: %v", args))
}
// we know that we are only using this for snapshots, so also sanity check for an @ in args[0]
if !strings.ContainsAny(args[0], "@") {
panic(fmt.Sprintf("sanity check: expecting '@' in call to Destroy, got %q", args[0]))
}
return ZFSDestroy(args[0])
}
var batchDestroyFeatureCheck struct {
once sync.Once
enable bool
err error
}
func (d destroyerImpl) DestroySnapshotsCommaSyntaxSupported() (bool, error) {
batchDestroyFeatureCheck.once.Do(func() {
// "feature discovery"
cmd := exec.Command(ZFS_BINARY, "destroy")
output, err := cmd.CombinedOutput()
if _, ok := err.(*exec.ExitError); !ok {
debug("destroy feature check failed: %T %s", err, err)
batchDestroyFeatureCheck.err = err
}
def := strings.Contains(string(output), "<filesystem|volume>@<snap>[%<snap>][,...]")
batchDestroyFeatureCheck.enable = envconst.Bool("ZREPL_EXPERIMENTAL_ZFS_COMMA_SYNTAX_SUPPORTED", def)
debug("destroy feature check complete %#v", &batchDestroyFeatureCheck)
})
return batchDestroyFeatureCheck.enable, batchDestroyFeatureCheck.err
}