mirror of
https://github.com/zrepl/zrepl.git
synced 2025-08-16 18:01:02 +02:00
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code for each RPC endpoint. Does not scale well. Goal: adding a new RPC call should - not require writing an RPC stub / handler - not require modifications to the RPC lib The wire format is inspired by HTTP2, the API by net/rpc. Frames are used for framing messages, i.e. a message is made of multiple frames which are glued together using a frame-bridging reader / writer. This roughly corresponds to HTTP2 streams, although we're happy with just one stream at any time and the resulting non-need for flow control, etc. Frames are typed using a header. The two most important types are 'Header' and 'Data'. The RPC protocol is built on top of this: - Client sends a header => multiple frames of type 'header' - Client sends request body => mulitiple frames of type 'data' - Server reads a header => multiple frames of type 'header' - Server reads request body => mulitiple frames of type 'data' - Server sends response header => ... - Server sends response body => ... An RPC header is serialized JSON and always the same structure. The body is of the type specified in the header. The RPC server and client use some semi-fancy reflection tequniques to automatically infer the data type of the request/response body based on the method signature of the server handler; or the client parameters, respectively. This boils down to a special-case for io.Reader, which are just dumped into a series of data frames as efficiently as possible. All other types are (de)serialized using encoding/json. The RPC layer and Frame Layer log some arbitrary messages that proved useful during debugging. By default, they log to a non-logger, which should not have a big impact on performance. pprof analysis shows the implementation spends its CPU time 60% waiting for syscalls 30% in memmove 10% ... On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the implementation achieved ~3.6GiB/s. Future optimization may include spice(2) / vmspice(2) on Linux, although this doesn't fit so well with the heavy use of io.Reader / io.Writer throughout the codebase. The existing hackaround for local calls was re-implemented to fit the new interface of PRCServer and RPCClient. The 'R'PC method invocation is a bit slower because reflection is involved inbetween, but otherwise performance should be no different. The RPC code currently does not support multipart requests and thus does not support the equivalent of a POST. Thus, the switch to the new rpc code had the following fallout: - Move request objects + constants from rpc package to main app code - Sacrifice the hacky 'push = pull me' way of doing push -> need to further extend RPC to support multipart requests or something to implement this properly with additional interfaces -> should be done after replication is abstracted better than separate algorithms for doPull() and doPush()
This commit is contained in:
@ -148,24 +148,20 @@ func (a localPullACL) Filter(p *zfs.DatasetPath) (pass bool, err error) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
const LOCAL_TRANSPORT_IDENTITY string = "local"
|
||||
|
||||
const DEFAULT_INITIAL_REPL_POLICY = InitialReplPolicyMostRecent
|
||||
|
||||
type InitialReplPolicy string
|
||||
|
||||
const (
|
||||
InitialReplPolicyMostRecent InitialReplPolicy = "most_recent"
|
||||
InitialReplPolicyAll InitialReplPolicy = "all"
|
||||
)
|
||||
|
||||
func jobPull(pull *Pull, log jobrun.Logger) (err error) {
|
||||
|
||||
if lt, ok := pull.From.Transport.(LocalTransport); ok {
|
||||
|
||||
lt.SetHandler(Handler{
|
||||
Logger: log,
|
||||
// Allow access to any dataset since we control what mapping
|
||||
// is passed to the pull routine.
|
||||
// All local datasets will be passed to its Map() function,
|
||||
// but only those for which a mapping exists will actually be pulled.
|
||||
// We can pay this small performance penalty for now.
|
||||
PullACL: localPullACL{},
|
||||
})
|
||||
pull.From.Transport = lt
|
||||
log.Printf("fixing up local transport: %#v", pull.From.Transport)
|
||||
}
|
||||
|
||||
var remote rpc.RPCRequester
|
||||
var remote rpc.RPCClient
|
||||
|
||||
if remote, err = pull.From.Transport.Connect(log); err != nil {
|
||||
return
|
||||
@ -182,7 +178,7 @@ func jobPush(push *Push, log jobrun.Logger) (err error) {
|
||||
panic("no support for local pushs")
|
||||
}
|
||||
|
||||
var remote rpc.RPCRequester
|
||||
var remote rpc.RPCClient
|
||||
if remote, err = push.To.Transport.Connect(log); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -197,27 +193,19 @@ func jobPush(push *Push, log jobrun.Logger) (err error) {
|
||||
}
|
||||
log.Printf("handler: %#v", handler)
|
||||
|
||||
r := rpc.PullMeRequest{
|
||||
InitialReplPolicy: push.InitialReplPolicy,
|
||||
}
|
||||
log.Printf("doing PullMeRequest: %#v", r)
|
||||
|
||||
if err = remote.PullMeRequest(r, handler); err != nil {
|
||||
log.Printf("PullMeRequest failed: %s", err)
|
||||
return
|
||||
}
|
||||
panic("no support for push atm")
|
||||
|
||||
log.Printf("push job finished")
|
||||
return
|
||||
|
||||
}
|
||||
|
||||
func closeRPCWithTimeout(log Logger, remote rpc.RPCRequester, timeout time.Duration, goodbye string) {
|
||||
func closeRPCWithTimeout(log Logger, remote rpc.RPCClient, timeout time.Duration, goodbye string) {
|
||||
log.Printf("closing rpc connection")
|
||||
|
||||
ch := make(chan error)
|
||||
go func() {
|
||||
ch <- remote.CloseRequest(rpc.CloseRequest{goodbye})
|
||||
ch <- remote.Close()
|
||||
close(ch)
|
||||
}()
|
||||
|
||||
@ -231,19 +219,15 @@ func closeRPCWithTimeout(log Logger, remote rpc.RPCRequester, timeout time.Durat
|
||||
|
||||
if err != nil {
|
||||
log.Printf("error closing connection: %s", err)
|
||||
err = remote.ForceClose()
|
||||
if err != nil {
|
||||
log.Printf("error force-closing connection: %s", err)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type PullContext struct {
|
||||
Remote rpc.RPCRequester
|
||||
Remote rpc.RPCClient
|
||||
Log Logger
|
||||
Mapping DatasetMapping
|
||||
InitialReplPolicy rpc.InitialReplPolicy
|
||||
InitialReplPolicy InitialReplPolicy
|
||||
}
|
||||
|
||||
func doPull(pull PullContext) (err error) {
|
||||
@ -252,9 +236,9 @@ func doPull(pull PullContext) (err error) {
|
||||
log := pull.Log
|
||||
|
||||
log.Printf("requesting remote filesystem list")
|
||||
fsr := rpc.FilesystemRequest{}
|
||||
fsr := FilesystemRequest{}
|
||||
var remoteFilesystems []*zfs.DatasetPath
|
||||
if remoteFilesystems, err = remote.FilesystemRequest(fsr); err != nil {
|
||||
if err = remote.Call("FilesystemRequest", &fsr, &remoteFilesystems); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
@ -335,11 +319,11 @@ func doPull(pull PullContext) (err error) {
|
||||
}
|
||||
|
||||
log("requesting remote filesystem versions")
|
||||
var theirVersions []zfs.FilesystemVersion
|
||||
theirVersions, err = remote.FilesystemVersionsRequest(rpc.FilesystemVersionsRequest{
|
||||
r := FilesystemVersionsRequest{
|
||||
Filesystem: m.Remote,
|
||||
})
|
||||
if err != nil {
|
||||
}
|
||||
var theirVersions []zfs.FilesystemVersion
|
||||
if err = remote.Call("FilesystemVersionsRequest", &r, &theirVersions); err != nil {
|
||||
log("error requesting remote filesystem versions: %s", err)
|
||||
log("stopping replication for all filesystems mapped as children of %s", m.Local.ToString())
|
||||
return false
|
||||
@ -358,7 +342,7 @@ func doPull(pull PullContext) (err error) {
|
||||
|
||||
log("performing initial sync, following policy: '%s'", pull.InitialReplPolicy)
|
||||
|
||||
if pull.InitialReplPolicy != rpc.InitialReplPolicyMostRecent {
|
||||
if pull.InitialReplPolicy != InitialReplPolicyMostRecent {
|
||||
panic(fmt.Sprintf("policy '%s' not implemented", pull.InitialReplPolicy))
|
||||
}
|
||||
|
||||
@ -374,7 +358,7 @@ func doPull(pull PullContext) (err error) {
|
||||
return false
|
||||
}
|
||||
|
||||
r := rpc.InitialTransferRequest{
|
||||
r := InitialTransferRequest{
|
||||
Filesystem: m.Remote,
|
||||
FilesystemVersion: snapsOnly[len(snapsOnly)-1],
|
||||
}
|
||||
@ -382,7 +366,8 @@ func doPull(pull PullContext) (err error) {
|
||||
log("requesting snapshot stream for %s", r.FilesystemVersion)
|
||||
|
||||
var stream io.Reader
|
||||
if stream, err = remote.InitialTransferRequest(r); err != nil {
|
||||
|
||||
if err = remote.Call("InitialTransferRequest", &r, &stream); err != nil {
|
||||
log("error requesting initial transfer: %s", err)
|
||||
return false
|
||||
}
|
||||
@ -434,13 +419,13 @@ func doPull(pull PullContext) (err error) {
|
||||
}
|
||||
|
||||
log("requesting incremental snapshot stream")
|
||||
r := rpc.IncrementalTransferRequest{
|
||||
r := IncrementalTransferRequest{
|
||||
Filesystem: m.Remote,
|
||||
From: from,
|
||||
To: to,
|
||||
}
|
||||
var stream io.Reader
|
||||
if stream, err = remote.IncrementalTransferRequest(r); err != nil {
|
||||
if err = remote.Call("IncrementalTransferRequest", &r, &stream); err != nil {
|
||||
log("error requesting incremental snapshot stream: %s", err)
|
||||
return false
|
||||
}
|
||||
|
Reference in New Issue
Block a user