rclone/cmd/gitannex/gitannex.go
2024-06-06 17:42:27 +01:00

614 lines
18 KiB
Go

// Package gitannex provides the "gitannex" command, which enables [git-annex]
// to communicate with rclone by implementing the [external special remote
// protocol]. The protocol is line delimited and spoken over stdin and stdout.
//
// # Milestones
//
// (Tracked in [issue #7625].)
//
// 1. ✅ Minimal support for the [external special remote protocol]. Tested on
// "local" and "drive" backends.
// 2. Add support for the ASYNC protocol extension. This may improve performance.
// 3. Support the [simple export interface]. This will enable `git-annex
// export` functionality.
// 4. Once the draft is finalized, support import/export interface.
//
// [git-annex]: https://git-annex.branchable.com/
// [external special remote protocol]: https://git-annex.branchable.com/design/external_special_remote_protocol/
// [simple export interface]: https://git-annex.branchable.com/design/external_special_remote_protocol/export_and_import_appendix/
// [issue #7625]: https://github.com/rclone/rclone/issues/7625
package gitannex
import (
"bufio"
"context"
_ "embed"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/rclone/rclone/cmd"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/cache"
"github.com/rclone/rclone/fs/operations"
"github.com/spf13/cobra"
)
const subcommandName string = "gitannex"
const uniqueCommandName string = "git-annex-remote-rclone-builtin"
//go:embed gitannex.md
var gitannexHelp string
func init() {
os.Args = maybeTransformArgs(os.Args)
cmd.Root.AddCommand(command)
}
// maybeTransformArgs returns a modified version of `args` with the "gitannex"
// subcommand inserted when `args` indicates that the program was executed as
// "git-annex-remote-rclone-builtin". One way this can happen is when rclone is
// invoked via symlink. Otherwise, returns `args`.
func maybeTransformArgs(args []string) []string {
if len(args) == 0 || filepath.Base(args[0]) != uniqueCommandName {
return args
}
newArgs := make([]string, 0, len(args)+1)
newArgs = append(newArgs, args[0])
newArgs = append(newArgs, subcommandName)
newArgs = append(newArgs, args[1:]...)
return newArgs
}
// messageParser helps parse messages we receive from git-annex into a sequence
// of parameters. Messages are not quite trivial to parse because they are
// separated by spaces, but the final parameter may itself contain spaces.
//
// This abstraction is necessary because simply splitting on space doesn't cut
// it. Also, we cannot know how many parameters to parse until we've parsed the
// first parameter.
type messageParser struct {
line string
}
// nextSpaceDelimitedParameter consumes the next space-delimited parameter.
func (m *messageParser) nextSpaceDelimitedParameter() (string, error) {
m.line = strings.TrimRight(m.line, "\r\n")
if len(m.line) == 0 {
return "", errors.New("nothing remains to parse")
}
before, after, found := strings.Cut(m.line, " ")
if found {
if len(before) == 0 {
return "", fmt.Errorf("found an empty space-delimited parameter in line: %q", m.line)
}
m.line = after
return before, nil
}
remaining := m.line
m.line = ""
return remaining, nil
}
// finalParameter consumes the final parameter, which may contain spaces.
func (m *messageParser) finalParameter() string {
m.line = strings.TrimRight(m.line, "\r\n")
if len(m.line) == 0 {
return ""
}
param := m.line
m.line = ""
return param
}
// configDefinition describes a configuration value required by this command. We
// use "GETCONFIG" messages to query git-annex for these values at runtime.
type configDefinition struct {
names []string
description string
destination *string
defaultValue *string
}
func (c *configDefinition) getCanonicalName() string {
if len(c.names) < 1 {
panic(fmt.Errorf("configDefinition must have at least one name: %v", c))
}
return c.names[0]
}
// fullDescription returns a single-line, human-readable description for this
// config. The returned string begins with a list of synonyms and ends with
// `c.description`.
func (c *configDefinition) fullDescription() string {
if len(c.names) <= 1 {
return c.description
}
// Exclude the canonical name from the list of synonyms.
synonyms := c.names[1:len(c.names)]
commaSeparatedSynonyms := strings.Join(synonyms, ", ")
return fmt.Sprintf("(synonyms: %s) %s", commaSeparatedSynonyms, c.description)
}
// server contains this command's current state.
type server struct {
reader *bufio.Reader
writer io.Writer
// When true, the server prints a transcript of messages sent and received
// to stderr.
verbose bool
extensionInfo bool
extensionAsync bool
extensionGetGitRemoteName bool
extensionUnavailableResponse bool
configsDone bool
configPrefix string
configRcloneRemoteName string
configRcloneLayout string
}
func (s *server) sendMsg(msg string) {
msg = msg + "\n"
if _, err := io.WriteString(s.writer, msg); err != nil {
panic(err)
}
if s.verbose {
_, err := os.Stderr.WriteString(fmt.Sprintf("server sent %q\n", msg))
if err != nil {
panic(fmt.Errorf("failed to write verbose message to stderr: %w", err))
}
}
}
func (s *server) getMsg() (*messageParser, error) {
msg, err := s.reader.ReadString('\n')
if err != nil {
if len(msg) == 0 {
// Git-annex closes stdin when it is done with us, so failing to
// read a new line is not an error.
return nil, nil
}
return nil, fmt.Errorf("expected message to end with newline: %q", msg)
}
if s.verbose {
_, err := os.Stderr.WriteString(fmt.Sprintf("server received %q\n", msg))
if err != nil {
return nil, fmt.Errorf("failed to write verbose message to stderr: %w", err)
}
}
return &messageParser{msg}, nil
}
func (s *server) run() error {
// The remote sends the first message.
s.sendMsg("VERSION 1")
for {
message, err := s.getMsg()
if err != nil {
return fmt.Errorf("error receiving message: %w", err)
}
if message == nil {
break
}
command, err := message.nextSpaceDelimitedParameter()
if err != nil {
return fmt.Errorf("failed to parse command")
}
switch command {
//
// Git-annex requires that these requests are supported.
//
case "INITREMOTE":
err = s.handleInitRemote()
case "PREPARE":
err = s.handlePrepare()
case "EXPORTSUPPORTED":
// Indicate that we do not support exports.
s.sendMsg("EXPORTSUPPORTED-FAILURE")
case "TRANSFER":
err = s.handleTransfer(message)
case "CHECKPRESENT":
err = s.handleCheckPresent(message)
case "REMOVE":
err = s.handleRemove(message)
case "ERROR":
errorMessage := message.finalParameter()
err = fmt.Errorf("received error message from git-annex: %s", errorMessage)
//
// These requests are optional.
//
case "EXTENSIONS":
// Git-annex just told us which protocol extensions it supports.
// Respond with the list of extensions that we want to use (none).
err = s.handleExtensions(message)
case "LISTCONFIGS":
s.handleListConfigs()
case "GETCOST":
// Git-annex wants to know the "cost" of using this remote. It
// probably depends on the backend we will be using, but let's just
// consider this an "expensive remote" per git-annex's
// Config/Cost.hs.
s.sendMsg("COST 200")
case "GETAVAILABILITY":
// Indicate that this is a cloud service.
s.sendMsg("AVAILABILITY GLOBAL")
case "CLAIMURL", "CHECKURL", "WHEREIS", "GETINFO":
s.sendMsg("UNSUPPORTED-REQUEST")
default:
err = fmt.Errorf("received unexpected message from git-annex: %s", message.line)
}
if err != nil {
return err
}
}
return nil
}
// Idempotently handle an incoming INITREMOTE message. This should perform
// one-time setup operations, but we may receive the command again, e.g. when
// this git-annex remote is initialized in a different repository.
func (s *server) handleInitRemote() error {
if err := s.queryConfigs(); err != nil {
return fmt.Errorf("failed to get configs: %w", err)
}
remoteRootFs, err := cache.Get(context.TODO(), fmt.Sprintf("%s:", s.configRcloneRemoteName))
if err != nil {
s.sendMsg("INITREMOTE-FAILURE failed to open root directory of rclone remote")
return fmt.Errorf("failed to open root directory of rclone remote: %w", err)
}
if !remoteRootFs.Features().CanHaveEmptyDirectories {
s.sendMsg("INITREMOTE-FAILURE this rclone remote does not support empty directories")
return fmt.Errorf("rclone remote does not support empty directories")
}
if err := operations.Mkdir(context.TODO(), remoteRootFs, s.configPrefix); err != nil {
s.sendMsg("INITREMOTE-FAILURE failed to mkdir")
return fmt.Errorf("failed to mkdir: %w", err)
}
s.sendMsg("INITREMOTE-SUCCESS")
return nil
}
// Get a list of configs with pointers to fields of `s`.
func (s *server) getRequiredConfigs() []configDefinition {
defaultRclonePrefix := "git-annex-rclone"
defaultRcloneLayout := "nodir"
return []configDefinition{
{
[]string{"rcloneremotename", "target"},
"Name of the rclone remote to use. " +
"Must match a remote known to rclone. " +
"(Note that rclone remotes are a distinct concept from git-annex remotes.)",
&s.configRcloneRemoteName,
nil,
},
{
[]string{"rcloneprefix", "prefix"},
"Directory where rclone will write git-annex content. " +
fmt.Sprintf("If not specified, defaults to %q. ", defaultRclonePrefix) +
"This directory will be created on init if it does not exist.",
&s.configPrefix,
&defaultRclonePrefix,
},
{
[]string{"rclonelayout", "rclone_layout"},
"Defines where, within the rcloneprefix directory, rclone will write git-annex content. " +
fmt.Sprintf("Must be one of %v. ", allLayoutModes()) +
fmt.Sprintf("If empty, defaults to %q.", defaultRcloneLayout),
&s.configRcloneLayout,
&defaultRcloneLayout,
},
}
}
// Query git-annex for config values.
func (s *server) queryConfigs() error {
if s.configsDone {
return nil
}
// Send a "GETCONFIG" message for each required config and parse git-annex's
// "VALUE" response.
for _, config := range s.getRequiredConfigs() {
var valueReceived bool
// Try each of the config's names in sequence, starting with the
// canonical name.
for _, configName := range config.names {
s.sendMsg(fmt.Sprintf("GETCONFIG %s", configName))
message, err := s.getMsg()
if err != nil {
return err
}
valueKeyword, err := message.nextSpaceDelimitedParameter()
if err != nil || valueKeyword != "VALUE" {
return fmt.Errorf("failed to parse config value: %s %s", valueKeyword, message.line)
}
value := message.finalParameter()
if value != "" {
*config.destination = value
valueReceived = true
break
}
}
if !valueReceived {
if config.defaultValue == nil {
return fmt.Errorf("did not receive a non-empty config value for %q", config.getCanonicalName())
}
*config.destination = *config.defaultValue
}
}
s.configsDone = true
return nil
}
func (s *server) handlePrepare() error {
if err := s.queryConfigs(); err != nil {
s.sendMsg("PREPARE-FAILURE Error getting configs")
return fmt.Errorf("error getting configs: %w", err)
}
s.sendMsg("PREPARE-SUCCESS")
return nil
}
// Git-annex is asking us to return the list of settings that we use. Keep this
// in sync with `handlePrepare()`.
func (s *server) handleListConfigs() {
for _, config := range s.getRequiredConfigs() {
s.sendMsg(fmt.Sprintf("CONFIG %s %s", config.getCanonicalName(), config.fullDescription()))
}
s.sendMsg("CONFIGEND")
}
func (s *server) handleTransfer(message *messageParser) error {
argMode, err := message.nextSpaceDelimitedParameter()
if err != nil {
s.sendMsg("TRANSFER-FAILURE failed to parse direction")
return fmt.Errorf("malformed arguments for TRANSFER: %w", err)
}
argKey, err := message.nextSpaceDelimitedParameter()
if err != nil {
s.sendMsg("TRANSFER-FAILURE failed to parse key")
return fmt.Errorf("malformed arguments for TRANSFER: %w", err)
}
argFile := message.finalParameter()
if argFile == "" {
s.sendMsg("TRANSFER-FAILURE failed to parse file path")
return errors.New("failed to parse file path")
}
if err := s.queryConfigs(); err != nil {
s.sendMsg(fmt.Sprintf("TRANSFER-FAILURE %s %s failed to get configs", argMode, argKey))
return fmt.Errorf("error getting configs: %w", err)
}
layout := parseLayoutMode(s.configRcloneLayout)
if layout == layoutModeUnknown {
s.sendMsg(fmt.Sprintf("TRANSFER-FAILURE %s", argKey))
return fmt.Errorf("error parsing layout mode: %q", s.configRcloneLayout)
}
remoteFsString, err := buildFsString(s.queryDirhash, layout, argKey, s.configRcloneRemoteName, s.configPrefix)
if err != nil {
s.sendMsg(fmt.Sprintf("TRANSFER-FAILURE %s", argKey))
return fmt.Errorf("error building fs string: %w", err)
}
remoteFs, err := cache.Get(context.TODO(), remoteFsString)
if err != nil {
s.sendMsg(fmt.Sprintf("TRANSFER-FAILURE %s %s failed to get remote fs", argMode, argKey))
return err
}
localDir := filepath.Dir(argFile)
localFs, err := cache.Get(context.TODO(), localDir)
if err != nil {
s.sendMsg(fmt.Sprintf("TRANSFER-FAILURE %s %s failed to get local fs", argMode, argKey))
return fmt.Errorf("failed to get local fs: %w", err)
}
remoteFileName := argKey
localFileName := filepath.Base(argFile)
switch argMode {
case "STORE":
err = operations.CopyFile(context.TODO(), remoteFs, localFs, remoteFileName, localFileName)
if err != nil {
s.sendMsg(fmt.Sprintf("TRANSFER-FAILURE %s %s failed to copy file: %s", argMode, argKey, err))
return err
}
case "RETRIEVE":
err = operations.CopyFile(context.TODO(), localFs, remoteFs, localFileName, remoteFileName)
// It is non-fatal when retrieval fails because the file is missing on
// the remote.
if err == fs.ErrorObjectNotFound {
s.sendMsg(fmt.Sprintf("TRANSFER-FAILURE %s %s not found", argMode, argKey))
return nil
}
if err != nil {
s.sendMsg(fmt.Sprintf("TRANSFER-FAILURE %s %s failed to copy file: %s", argMode, argKey, err))
return err
}
default:
s.sendMsg(fmt.Sprintf("TRANSFER-FAILURE %s %s unrecognized mode", argMode, argKey))
return fmt.Errorf("received malformed TRANSFER mode: %v", argMode)
}
s.sendMsg(fmt.Sprintf("TRANSFER-SUCCESS %s %s", argMode, argKey))
return nil
}
func (s *server) handleCheckPresent(message *messageParser) error {
argKey := message.finalParameter()
if argKey == "" {
return errors.New("failed to parse response for CHECKPRESENT")
}
if err := s.queryConfigs(); err != nil {
s.sendMsg(fmt.Sprintf("CHECKPRESENT-FAILURE %s failed to get configs", argKey))
return fmt.Errorf("error getting configs: %s", err)
}
layout := parseLayoutMode(s.configRcloneLayout)
if layout == layoutModeUnknown {
s.sendMsg(fmt.Sprintf("CHECKPRESENT-FAILURE %s", argKey))
return fmt.Errorf("error parsing layout mode: %q", s.configRcloneLayout)
}
remoteFsString, err := buildFsString(s.queryDirhash, layout, argKey, s.configRcloneRemoteName, s.configPrefix)
if err != nil {
s.sendMsg(fmt.Sprintf("CHECKPRESENT-FAILURE %s", argKey))
return fmt.Errorf("error building fs string: %w", err)
}
remoteFs, err := cache.Get(context.TODO(), remoteFsString)
if err != nil {
s.sendMsg(fmt.Sprintf("CHECKPRESENT-UNKNOWN %s failed to get remote fs", argKey))
return err
}
_, err = remoteFs.NewObject(context.TODO(), argKey)
if err == fs.ErrorObjectNotFound {
s.sendMsg(fmt.Sprintf("CHECKPRESENT-FAILURE %s", argKey))
return nil
}
if err != nil {
s.sendMsg(fmt.Sprintf("CHECKPRESENT-UNKNOWN %s error finding file", argKey))
return err
}
s.sendMsg(fmt.Sprintf("CHECKPRESENT-SUCCESS %s", argKey))
return nil
}
func (s *server) queryDirhash(msg string) (string, error) {
s.sendMsg(msg)
parser, err := s.getMsg()
if err != nil {
return "", err
}
keyword, err := parser.nextSpaceDelimitedParameter()
if err != nil {
return "", err
}
if keyword != "VALUE" {
return "", fmt.Errorf("expected VALUE keyword, but got %q", keyword)
}
dirhash, err := parser.nextSpaceDelimitedParameter()
if err != nil {
return "", fmt.Errorf("failed to parse dirhash: %w", err)
}
return dirhash, nil
}
func (s *server) handleRemove(message *messageParser) error {
argKey := message.finalParameter()
if argKey == "" {
return errors.New("failed to parse key for REMOVE")
}
layout := parseLayoutMode(s.configRcloneLayout)
if layout == layoutModeUnknown {
s.sendMsg(fmt.Sprintf("REMOVE-FAILURE %s", argKey))
return fmt.Errorf("error parsing layout mode: %q", s.configRcloneLayout)
}
remoteFsString, err := buildFsString(s.queryDirhash, layout, argKey, s.configRcloneRemoteName, s.configPrefix)
if err != nil {
s.sendMsg(fmt.Sprintf("REMOVE-FAILURE %s", argKey))
return fmt.Errorf("error building fs string: %w", err)
}
remoteFs, err := cache.Get(context.TODO(), remoteFsString)
if err != nil {
s.sendMsg(fmt.Sprintf("REMOVE-FAILURE %s", argKey))
return fmt.Errorf("error getting remote fs: %w", err)
}
fileObj, err := remoteFs.NewObject(context.TODO(), argKey)
// It is non-fatal when removal fails because the file is missing on the
// remote.
if errors.Is(err, fs.ErrorObjectNotFound) {
s.sendMsg(fmt.Sprintf("REMOVE-SUCCESS %s", argKey))
return nil
}
if err != nil {
s.sendMsg(fmt.Sprintf("REMOVE-FAILURE %s error getting new fs object: %s", argKey, err))
return fmt.Errorf("error getting new fs object: %w", err)
}
if err := operations.DeleteFile(context.TODO(), fileObj); err != nil {
s.sendMsg(fmt.Sprintf("REMOVE-FAILURE %s error deleting file", argKey))
return fmt.Errorf("error deleting file: %q", argKey)
}
s.sendMsg(fmt.Sprintf("REMOVE-SUCCESS %s", argKey))
return nil
}
func (s *server) handleExtensions(message *messageParser) error {
for {
extension, err := message.nextSpaceDelimitedParameter()
if err != nil {
break
}
switch extension {
case "INFO":
s.extensionInfo = true
case "ASYNC":
s.extensionAsync = true
case "GETGITREMOTENAME":
s.extensionGetGitRemoteName = true
case "UNAVAILABLERESPONSE":
s.extensionUnavailableResponse = true
}
}
s.sendMsg("EXTENSIONS")
return nil
}
var command = &cobra.Command{
Aliases: []string{uniqueCommandName},
Use: subcommandName,
Short: "Speaks with git-annex over stdin/stdout.",
Long: gitannexHelp,
Annotations: map[string]string{
"versionIntroduced": "v1.67.0",
},
Run: func(command *cobra.Command, args []string) {
cmd.CheckArgs(0, 0, command, args)
s := server{
reader: bufio.NewReader(os.Stdin),
writer: os.Stdout,
}
err := s.run()
if err != nil {
s.sendMsg(fmt.Sprintf("ERROR %s", err.Error()))
panic(err)
}
},
}