2017-07-06 13:03:44 +02:00
|
|
|
package cmd
|
2017-05-20 17:08:18 +02:00
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"time"
|
2017-07-30 09:14:37 +02:00
|
|
|
|
|
|
|
"github.com/zrepl/zrepl/rpc"
|
|
|
|
"github.com/zrepl/zrepl/util"
|
|
|
|
"github.com/zrepl/zrepl/zfs"
|
2017-05-20 17:08:18 +02:00
|
|
|
)
|
|
|
|
|
2017-08-05 21:15:37 +02:00
|
|
|
type localPullACL struct{}
|
|
|
|
|
2017-08-06 13:04:29 +02:00
|
|
|
func (a localPullACL) Filter(p *zfs.DatasetPath) (pass bool, err error) {
|
2017-08-05 21:15:37 +02:00
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
const LOCAL_TRANSPORT_IDENTITY string = "local"
|
2017-07-06 13:03:44 +02:00
|
|
|
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
const DEFAULT_INITIAL_REPL_POLICY = InitialReplPolicyMostRecent
|
2017-07-06 13:03:44 +02:00
|
|
|
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
type InitialReplPolicy string
|
|
|
|
|
|
|
|
const (
|
|
|
|
InitialReplPolicyMostRecent InitialReplPolicy = "most_recent"
|
|
|
|
InitialReplPolicyAll InitialReplPolicy = "all"
|
|
|
|
)
|
|
|
|
|
|
|
|
func closeRPCWithTimeout(log Logger, remote rpc.RPCClient, timeout time.Duration, goodbye string) {
|
2017-05-20 17:08:18 +02:00
|
|
|
log.Printf("closing rpc connection")
|
|
|
|
|
|
|
|
ch := make(chan error)
|
|
|
|
go func() {
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
ch <- remote.Close()
|
2017-08-09 21:03:05 +02:00
|
|
|
close(ch)
|
2017-05-20 17:08:18 +02:00
|
|
|
}()
|
|
|
|
|
|
|
|
var err error
|
|
|
|
select {
|
|
|
|
case <-time.After(timeout):
|
|
|
|
err = fmt.Errorf("timeout exceeded (%s)", timeout)
|
|
|
|
case closeRequestErr := <-ch:
|
|
|
|
err = closeRequestErr
|
|
|
|
}
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("error closing connection: %s", err)
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
type PullContext struct {
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
Remote rpc.RPCClient
|
2017-05-20 17:08:18 +02:00
|
|
|
Log Logger
|
2017-08-05 21:15:37 +02:00
|
|
|
Mapping DatasetMapping
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
InitialReplPolicy InitialReplPolicy
|
2017-05-20 17:08:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func doPull(pull PullContext) (err error) {
|
|
|
|
|
|
|
|
remote := pull.Remote
|
|
|
|
log := pull.Log
|
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log.Printf("requesting remote filesystem list")
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
fsr := FilesystemRequest{}
|
2017-08-06 13:04:29 +02:00
|
|
|
var remoteFilesystems []*zfs.DatasetPath
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
if err = remote.Call("FilesystemRequest", &fsr, &remoteFilesystems); err != nil {
|
2017-05-20 17:08:18 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log.Printf("map remote filesystems to local paths and determine order for per-filesystem sync")
|
2017-05-20 17:08:18 +02:00
|
|
|
type RemoteLocalMapping struct {
|
2017-08-06 13:04:29 +02:00
|
|
|
Remote *zfs.DatasetPath
|
|
|
|
Local *zfs.DatasetPath
|
2017-05-20 17:08:18 +02:00
|
|
|
}
|
|
|
|
replMapping := make(map[string]RemoteLocalMapping, len(remoteFilesystems))
|
|
|
|
localTraversal := zfs.NewDatasetPathForest()
|
2017-08-06 18:28:05 +02:00
|
|
|
for fs := range remoteFilesystems {
|
|
|
|
var err error
|
|
|
|
var localFs *zfs.DatasetPath
|
|
|
|
localFs, err = pull.Mapping.Map(remoteFilesystems[fs])
|
|
|
|
if err != nil {
|
2017-09-02 12:40:22 +02:00
|
|
|
err := fmt.Errorf("error mapping %s: %s", remoteFilesystems[fs], err)
|
|
|
|
log.Printf("%s", err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if localFs == nil {
|
2017-08-06 18:28:05 +02:00
|
|
|
continue
|
2017-05-20 17:08:18 +02:00
|
|
|
}
|
2017-08-06 18:28:05 +02:00
|
|
|
log.Printf("%s => %s", remoteFilesystems[fs].ToString(), localFs.ToString())
|
|
|
|
m := RemoteLocalMapping{remoteFilesystems[fs], localFs}
|
|
|
|
replMapping[m.Local.ToString()] = m
|
|
|
|
localTraversal.Add(m.Local)
|
2017-05-20 17:08:18 +02:00
|
|
|
}
|
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log.Printf("build cache for already present local filesystem state")
|
2017-07-30 14:56:16 +02:00
|
|
|
localFilesystemState, err := zfs.ZFSListFilesystemState()
|
|
|
|
if err != nil {
|
2017-08-06 18:28:05 +02:00
|
|
|
log.Printf("error requesting local filesystem state: %s", err)
|
2017-07-30 14:56:16 +02:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log.Printf("start per-filesystem sync")
|
2017-05-20 17:08:18 +02:00
|
|
|
localTraversal.WalkTopDown(func(v zfs.DatasetPathVisit) bool {
|
|
|
|
|
|
|
|
if v.FilledIn {
|
2017-07-30 14:56:16 +02:00
|
|
|
if _, exists := localFilesystemState[v.Path.ToString()]; exists {
|
|
|
|
// No need to verify if this is a placeholder or not. It is sufficient
|
|
|
|
// to know we can add child filesystems to it
|
2017-05-20 17:08:18 +02:00
|
|
|
return true
|
|
|
|
}
|
2017-08-06 18:28:05 +02:00
|
|
|
log.Printf("creating placeholder filesystem %s", v.Path.ToString())
|
2017-07-30 14:56:16 +02:00
|
|
|
err = zfs.ZFSCreatePlaceholderFilesystem(v.Path)
|
|
|
|
if err != nil {
|
|
|
|
err = fmt.Errorf("aborting, cannot create placeholder filesystem %s: %s", v.Path, err)
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return true
|
2017-05-20 17:08:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
m, ok := replMapping[v.Path.ToString()]
|
|
|
|
if !ok {
|
|
|
|
panic("internal inconsistency: replMapping should contain mapping for any path that was not filled in by WalkTopDown()")
|
|
|
|
}
|
|
|
|
|
|
|
|
log := func(format string, args ...interface{}) {
|
|
|
|
log.Printf("[%s => %s]: %s", m.Remote.ToString(), m.Local.ToString(), fmt.Sprintf(format, args...))
|
|
|
|
}
|
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log("examing local filesystem state")
|
2017-07-30 14:56:16 +02:00
|
|
|
localState, localExists := localFilesystemState[m.Local.ToString()]
|
2017-05-20 17:08:18 +02:00
|
|
|
var versions []zfs.FilesystemVersion
|
2017-07-30 14:56:16 +02:00
|
|
|
switch {
|
|
|
|
case !localExists:
|
|
|
|
log("local filesystem does not exist")
|
|
|
|
case localState.Placeholder:
|
|
|
|
log("local filesystem is marked as placeholder")
|
|
|
|
default:
|
2017-08-06 18:28:05 +02:00
|
|
|
log("local filesystem exists")
|
|
|
|
log("requesting local filesystem versions")
|
2017-06-22 21:49:14 +02:00
|
|
|
if versions, err = zfs.ZFSListFilesystemVersions(m.Local, nil); err != nil {
|
2017-08-06 18:28:05 +02:00
|
|
|
log("cannot get local filesystem versions: %s", err)
|
2017-05-20 17:08:18 +02:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log("requesting remote filesystem versions")
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
r := FilesystemVersionsRequest{
|
2017-05-20 17:08:18 +02:00
|
|
|
Filesystem: m.Remote,
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
}
|
|
|
|
var theirVersions []zfs.FilesystemVersion
|
|
|
|
if err = remote.Call("FilesystemVersionsRequest", &r, &theirVersions); err != nil {
|
2017-08-06 18:28:05 +02:00
|
|
|
log("error requesting remote filesystem versions: %s", err)
|
|
|
|
log("stopping replication for all filesystems mapped as children of %s", m.Local.ToString())
|
2017-05-20 17:08:18 +02:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log("computing diff between remote and local filesystem versions")
|
2017-05-20 17:08:18 +02:00
|
|
|
diff := zfs.MakeFilesystemDiff(versions, theirVersions)
|
2017-08-06 18:28:05 +02:00
|
|
|
log("%s", diff)
|
2017-05-20 17:08:18 +02:00
|
|
|
|
2017-07-30 14:56:16 +02:00
|
|
|
if localState.Placeholder && diff.Conflict != zfs.ConflictAllRight {
|
|
|
|
panic("internal inconsistency: local placeholder implies ConflictAllRight")
|
|
|
|
}
|
|
|
|
|
2017-07-08 13:13:16 +02:00
|
|
|
switch diff.Conflict {
|
|
|
|
case zfs.ConflictAllRight:
|
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log("performing initial sync, following policy: '%s'", pull.InitialReplPolicy)
|
2017-05-20 17:08:18 +02:00
|
|
|
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
if pull.InitialReplPolicy != InitialReplPolicyMostRecent {
|
2017-08-06 18:28:05 +02:00
|
|
|
panic(fmt.Sprintf("policy '%s' not implemented", pull.InitialReplPolicy))
|
2017-05-20 17:08:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
snapsOnly := make([]zfs.FilesystemVersion, 0, len(diff.MRCAPathRight))
|
|
|
|
for s := range diff.MRCAPathRight {
|
|
|
|
if diff.MRCAPathRight[s].Type == zfs.Snapshot {
|
|
|
|
snapsOnly = append(snapsOnly, diff.MRCAPathRight[s])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(snapsOnly) < 1 {
|
|
|
|
log("cannot perform initial sync: no remote snapshots. stopping...")
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
r := InitialTransferRequest{
|
2017-05-20 17:08:18 +02:00
|
|
|
Filesystem: m.Remote,
|
|
|
|
FilesystemVersion: snapsOnly[len(snapsOnly)-1],
|
|
|
|
}
|
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log("requesting snapshot stream for %s", r.FilesystemVersion)
|
2017-05-20 17:08:18 +02:00
|
|
|
|
|
|
|
var stream io.Reader
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
|
|
|
|
if err = remote.Call("InitialTransferRequest", &r, &stream); err != nil {
|
2017-08-06 18:28:05 +02:00
|
|
|
log("error requesting initial transfer: %s", err)
|
2017-05-20 17:08:18 +02:00
|
|
|
return false
|
|
|
|
}
|
2017-08-06 18:28:05 +02:00
|
|
|
log("received initial transfer request response")
|
2017-05-20 17:08:18 +02:00
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log("invoking zfs receive")
|
2017-07-30 09:14:37 +02:00
|
|
|
watcher := util.IOProgressWatcher{Reader: stream}
|
|
|
|
watcher.KickOff(1*time.Second, func(p util.IOProgress) {
|
|
|
|
log("progress on receive operation: %v bytes received", p.TotalRX)
|
|
|
|
})
|
|
|
|
|
2017-07-30 14:56:16 +02:00
|
|
|
recvArgs := []string{"-u"}
|
|
|
|
if localState.Placeholder {
|
|
|
|
log("receive with forced rollback to replace placeholder filesystem")
|
|
|
|
recvArgs = append(recvArgs, "-F")
|
|
|
|
}
|
|
|
|
|
|
|
|
if err = zfs.ZFSRecv(m.Local, &watcher, recvArgs...); err != nil {
|
2017-08-06 18:28:05 +02:00
|
|
|
log("error receiving stream: %s", err)
|
2017-05-20 17:08:18 +02:00
|
|
|
return false
|
|
|
|
}
|
2017-08-06 18:28:05 +02:00
|
|
|
log("finished receiving stream, %v bytes total", watcher.Progress().TotalRX)
|
2017-05-20 17:08:18 +02:00
|
|
|
|
|
|
|
log("configuring properties of received filesystem")
|
|
|
|
if err = zfs.ZFSSet(m.Local, "readonly", "on"); err != nil {
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
log("finished initial transfer")
|
2017-07-08 13:13:16 +02:00
|
|
|
return true
|
2017-05-20 17:08:18 +02:00
|
|
|
|
2017-07-08 13:13:16 +02:00
|
|
|
case zfs.ConflictIncremental:
|
|
|
|
|
|
|
|
if len(diff.IncrementalPath) < 2 {
|
|
|
|
log("remote and local are in sync")
|
|
|
|
return true
|
|
|
|
}
|
2017-05-20 17:08:18 +02:00
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log("following incremental path from diff")
|
2017-07-30 09:14:37 +02:00
|
|
|
var pathRx uint64
|
2017-05-20 17:08:18 +02:00
|
|
|
|
|
|
|
for i := 0; i < len(diff.IncrementalPath)-1; i++ {
|
|
|
|
|
|
|
|
from, to := diff.IncrementalPath[i], diff.IncrementalPath[i+1]
|
|
|
|
|
|
|
|
log := func(format string, args ...interface{}) {
|
2017-08-06 18:28:05 +02:00
|
|
|
log("[%v/%v][%s => %s]: %s", i+1, len(diff.IncrementalPath)-1,
|
|
|
|
from.Name, to.Name, fmt.Sprintf(format, args...))
|
2017-05-20 17:08:18 +02:00
|
|
|
}
|
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log("requesting incremental snapshot stream")
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
r := IncrementalTransferRequest{
|
2017-05-20 17:08:18 +02:00
|
|
|
Filesystem: m.Remote,
|
|
|
|
From: from,
|
|
|
|
To: to,
|
|
|
|
}
|
|
|
|
var stream io.Reader
|
reimplement io.ReadWriteCloser based RPC mechanism
The existing ByteStreamRPC requires writing RPC stub + server code
for each RPC endpoint. Does not scale well.
Goal: adding a new RPC call should
- not require writing an RPC stub / handler
- not require modifications to the RPC lib
The wire format is inspired by HTTP2, the API by net/rpc.
Frames are used for framing messages, i.e. a message is made of multiple
frames which are glued together using a frame-bridging reader / writer.
This roughly corresponds to HTTP2 streams, although we're happy with
just one stream at any time and the resulting non-need for flow control,
etc.
Frames are typed using a header. The two most important types are
'Header' and 'Data'.
The RPC protocol is built on top of this:
- Client sends a header => multiple frames of type 'header'
- Client sends request body => mulitiple frames of type 'data'
- Server reads a header => multiple frames of type 'header'
- Server reads request body => mulitiple frames of type 'data'
- Server sends response header => ...
- Server sends response body => ...
An RPC header is serialized JSON and always the same structure.
The body is of the type specified in the header.
The RPC server and client use some semi-fancy reflection tequniques to
automatically infer the data type of the request/response body based on
the method signature of the server handler; or the client parameters,
respectively.
This boils down to a special-case for io.Reader, which are just dumped
into a series of data frames as efficiently as possible.
All other types are (de)serialized using encoding/json.
The RPC layer and Frame Layer log some arbitrary messages that proved
useful during debugging. By default, they log to a non-logger, which
should not have a big impact on performance.
pprof analysis shows the implementation spends its CPU time
60% waiting for syscalls
30% in memmove
10% ...
On a Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz CPU, Linux 4.12, the
implementation achieved ~3.6GiB/s.
Future optimization may include spice(2) / vmspice(2) on Linux, although
this doesn't fit so well with the heavy use of io.Reader / io.Writer
throughout the codebase.
The existing hackaround for local calls was re-implemented to fit the
new interface of PRCServer and RPCClient.
The 'R'PC method invocation is a bit slower because reflection is
involved inbetween, but otherwise performance should be no different.
The RPC code currently does not support multipart requests and thus does
not support the equivalent of a POST.
Thus, the switch to the new rpc code had the following fallout:
- Move request objects + constants from rpc package to main app code
- Sacrifice the hacky 'push = pull me' way of doing push
-> need to further extend RPC to support multipart requests or
something to implement this properly with additional interfaces
-> should be done after replication is abstracted better than separate
algorithms for doPull() and doPush()
2017-08-19 22:37:14 +02:00
|
|
|
if err = remote.Call("IncrementalTransferRequest", &r, &stream); err != nil {
|
2017-08-06 18:28:05 +02:00
|
|
|
log("error requesting incremental snapshot stream: %s", err)
|
2017-05-20 17:08:18 +02:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log("invoking zfs receive")
|
2017-07-30 09:14:37 +02:00
|
|
|
watcher := util.IOProgressWatcher{Reader: stream}
|
|
|
|
watcher.KickOff(1*time.Second, func(p util.IOProgress) {
|
|
|
|
log("progress on receive operation: %v bytes received", p.TotalRX)
|
|
|
|
})
|
|
|
|
|
|
|
|
if err = zfs.ZFSRecv(m.Local, &watcher); err != nil {
|
2017-08-06 18:28:05 +02:00
|
|
|
log("error receiving stream: %s", err)
|
2017-05-20 17:08:18 +02:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2017-07-30 09:14:37 +02:00
|
|
|
totalRx := watcher.Progress().TotalRX
|
|
|
|
pathRx += totalRx
|
|
|
|
log("finished incremental transfer, %v bytes total", totalRx)
|
2017-05-20 17:08:18 +02:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log("finished following incremental path, %v bytes total", pathRx)
|
2017-07-08 13:13:16 +02:00
|
|
|
return true
|
|
|
|
|
|
|
|
case zfs.ConflictNoCommonAncestor:
|
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log("remote and local filesystem have snapshots, but no common one")
|
2017-07-08 13:13:16 +02:00
|
|
|
log("perform manual replication to establish a common snapshot history")
|
2017-08-06 18:28:05 +02:00
|
|
|
log("remote versions:")
|
|
|
|
for _, v := range diff.MRCAPathRight {
|
|
|
|
log(" %s (GUID %v)", v, v.Guid)
|
|
|
|
}
|
|
|
|
log("local versions:")
|
|
|
|
for _, v := range diff.MRCAPathLeft {
|
|
|
|
log(" %s (GUID %v)", v, v.Guid)
|
|
|
|
}
|
2017-07-08 13:13:16 +02:00
|
|
|
return false
|
|
|
|
|
|
|
|
case zfs.ConflictDiverged:
|
|
|
|
|
2017-08-06 18:28:05 +02:00
|
|
|
log("remote and local filesystem share a history but have diverged")
|
2017-07-08 13:13:16 +02:00
|
|
|
log("perform manual replication or delete snapshots on the receiving" +
|
|
|
|
"side to establish an incremental replication parse")
|
2017-08-06 18:28:05 +02:00
|
|
|
log("remote-only versions:")
|
|
|
|
for _, v := range diff.MRCAPathRight {
|
|
|
|
log(" %s (GUID %v)", v, v.Guid)
|
|
|
|
}
|
|
|
|
log("local-only versions:")
|
|
|
|
for _, v := range diff.MRCAPathLeft {
|
|
|
|
log(" %s (GUID %v)", v, v.Guid)
|
|
|
|
}
|
2017-07-08 13:13:16 +02:00
|
|
|
return false
|
2017-05-20 17:08:18 +02:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2017-07-08 13:13:16 +02:00
|
|
|
panic("implementation error: this should not be reached")
|
|
|
|
return false
|
2017-05-20 17:08:18 +02:00
|
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
}
|