2023-06-23 10:50:01 +02:00
//go:build !plan9 && !solaris && !js
// +build !plan9,!solaris,!js
2018-07-13 17:21:49 +02:00
2022-08-28 13:21:57 +02:00
// Package azureblob provides an interface to the Microsoft Azure blob object storage system
2017-07-25 16:18:13 +02:00
package azureblob
import (
2018-07-13 17:21:49 +02:00
"context"
2022-11-23 17:46:21 +01:00
"crypto/md5"
2017-07-25 16:18:13 +02:00
"encoding/base64"
"encoding/hex"
2022-11-29 16:43:22 +01:00
"encoding/json"
2021-11-04 11:12:57 +01:00
"errors"
2017-07-25 16:18:13 +02:00
"fmt"
"io"
"net/http"
2018-03-24 15:01:23 +01:00
"net/url"
2022-11-29 16:43:22 +01:00
"os"
2017-07-25 16:18:13 +02:00
"path"
2023-08-16 17:59:39 +02:00
"sort"
2021-10-19 21:10:18 +02:00
"strconv"
2017-07-25 16:18:13 +02:00
"strings"
"sync"
"time"
2022-11-23 17:46:21 +01:00
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
2022-11-29 16:43:22 +01:00
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
2022-11-23 17:46:21 +01:00
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container"
2022-11-29 16:43:22 +01:00
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/sas"
2022-11-23 17:46:21 +01:00
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/service"
2019-07-28 19:47:38 +02:00
"github.com/rclone/rclone/fs"
2022-05-06 21:26:05 +02:00
"github.com/rclone/rclone/fs/chunksize"
2020-01-14 18:33:35 +01:00
"github.com/rclone/rclone/fs/config"
2019-07-28 19:47:38 +02:00
"github.com/rclone/rclone/fs/config/configmap"
"github.com/rclone/rclone/fs/config/configstruct"
2022-11-30 12:45:34 +01:00
"github.com/rclone/rclone/fs/config/obscure"
2019-07-28 19:47:38 +02:00
"github.com/rclone/rclone/fs/fserrors"
"github.com/rclone/rclone/fs/fshttp"
"github.com/rclone/rclone/fs/hash"
"github.com/rclone/rclone/fs/walk"
2019-08-16 11:10:56 +02:00
"github.com/rclone/rclone/lib/bucket"
2020-01-14 18:33:35 +01:00
"github.com/rclone/rclone/lib/encoder"
2022-11-29 16:43:22 +01:00
"github.com/rclone/rclone/lib/env"
2023-08-16 17:59:39 +02:00
"github.com/rclone/rclone/lib/multipart"
2019-07-28 19:47:38 +02:00
"github.com/rclone/rclone/lib/pacer"
2017-07-25 16:18:13 +02:00
)
const (
2018-07-13 17:21:49 +02:00
minSleep = 10 * time . Millisecond
maxSleep = 10 * time . Second
2022-11-23 17:46:21 +01:00
decayConstant = 1 // bigger for slower decay, exponential
maxListChunkSize = 5000 // number of items to read at once
2018-07-13 17:21:49 +02:00
modTimeKey = "mtime"
2023-04-28 18:24:19 +02:00
dirMetaKey = "hdi_isfolder"
dirMetaValue = "true"
2018-07-13 17:21:49 +02:00
timeFormatIn = time . RFC3339
timeFormatOut = "2006-01-02T15:04:05.000000000Z07:00"
storageDefaultBaseURL = "blob.core.windows.net"
2021-03-02 20:11:57 +01:00
defaultChunkSize = 4 * fs . Mebi
2022-11-23 17:46:21 +01:00
defaultAccessTier = blob . AccessTier ( "" ) // FIXME AccessTierNone
2019-06-27 05:46:22 +02:00
// Default storage account, key and blob endpoint for emulator support,
// though it is a base64 key checked in here, it is publicly available secret.
emulatorAccount = "devstoreaccount1"
emulatorAccountKey = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
emulatorBlobEndpoint = "http://127.0.0.1:10000/devstoreaccount1"
2017-07-25 16:18:13 +02:00
)
2020-11-30 13:09:37 +01:00
var (
errCantUpdateArchiveTierBlobs = fserrors . NoRetryError ( errors . New ( "can't update archive tier blob without --azureblob-archive-tier-delete" ) )
2023-09-13 16:36:44 +02:00
// Take this when changing or reading metadata.
//
// It acts as global metadata lock so we don't bloat Object
// with an extra lock that will only very rarely be contended.
metadataMu sync . Mutex
2020-11-30 13:09:37 +01:00
)
2017-07-25 16:18:13 +02:00
// Register with Fs
func init ( ) {
fs . Register ( & fs . RegInfo {
Name : "azureblob" ,
Description : "Microsoft Azure Blob Storage" ,
NewFs : NewFs ,
Options : [ ] fs . Option { {
Name : "account" ,
2022-11-30 12:45:34 +01:00
Help : ` Azure Storage Account Name .
2020-12-06 06:13:51 +01:00
2022-11-30 12:45:34 +01:00
Set this to the Azure Storage Account Name in use .
2020-12-06 06:13:51 +01:00
2022-11-30 12:45:34 +01:00
Leave blank to use SAS URL or Emulator , otherwise it needs to be set .
If this is blank and if env_auth is set it will be read from the
environment variable ` + " ` AZURE_STORAGE_ACCOUNT_NAME ` " + ` if possible .
2020-12-06 06:13:51 +01:00
` ,
2023-07-06 18:55:53 +02:00
Sensitive : true ,
2022-11-14 05:12:52 +01:00
} , {
Name : "env_auth" ,
2022-11-30 12:45:34 +01:00
Help : ` Read credentials from runtime ( environment variables , CLI or MSI ) .
2022-11-14 05:12:52 +01:00
2022-11-30 12:45:34 +01:00
See the [ authentication docs ] ( / azureblob # authentication ) for full info . ` ,
2022-11-14 05:12:52 +01:00
Default : false ,
2017-07-25 16:18:13 +02:00
} , {
Name : "key" ,
2022-11-29 16:43:22 +01:00
Help : ` Storage Account Shared Key .
Leave blank to use SAS URL or Emulator . ` ,
2023-07-06 18:55:53 +02:00
Sensitive : true ,
2018-03-24 15:01:23 +01:00
} , {
Name : "sas_url" ,
2022-11-29 16:43:22 +01:00
Help : ` SAS URL for container level access only .
Leave blank if using account / key or Emulator . ` ,
2023-07-06 18:55:53 +02:00
Sensitive : true ,
2022-11-30 12:45:34 +01:00
} , {
Name : "tenant" ,
Help : ` ID of the service principal ' s tenant . Also called its directory ID .
Set this if using
- Service principal with client secret
- Service principal with certificate
- User with username and password
` ,
2023-07-06 18:55:53 +02:00
Sensitive : true ,
2022-11-30 12:45:34 +01:00
} , {
Name : "client_id" ,
Help : ` The ID of the client in use .
Set this if using
- Service principal with client secret
- Service principal with certificate
- User with username and password
` ,
2023-07-06 18:55:53 +02:00
Sensitive : true ,
2022-11-30 12:45:34 +01:00
} , {
Name : "client_secret" ,
Help : ` One of the service principal ' s client secrets
Set this if using
- Service principal with client secret
` ,
2023-07-06 18:55:53 +02:00
Sensitive : true ,
2022-11-30 12:45:34 +01:00
} , {
Name : "client_certificate_path" ,
Help : ` Path to a PEM or PKCS12 certificate file including the private key .
Set this if using
- Service principal with certificate
` ,
} , {
Name : "client_certificate_password" ,
Help : ` Password for the certificate file ( optional ) .
Optionally set this if using
- Service principal with certificate
And the certificate has a password .
` ,
IsPassword : true ,
} , {
Name : "client_send_certificate_chain" ,
Help : ` Send the certificate chain when using certificate auth .
Specifies whether an authentication request will include an x5c header
to support subject name / issuer based authentication . When set to
true , authentication requests include the x5c header .
Optionally set this if using
- Service principal with certificate
` ,
Default : false ,
Advanced : true ,
} , {
Name : "username" ,
Help : ` User name ( usually an email address )
Set this if using
- User with username and password
` ,
2023-07-06 18:55:53 +02:00
Advanced : true ,
Sensitive : true ,
2022-11-30 12:45:34 +01:00
} , {
Name : "password" ,
Help : ` The user ' s password
Set this if using
- User with username and password
` ,
IsPassword : true ,
Advanced : true ,
2022-11-29 16:43:22 +01:00
} , {
Name : "service_principal_file" ,
Help : ` Path to file containing credentials for use with a service principal .
Leave blank normally . Needed only if you want to use a service principal instead of interactive login .
$ az ad sp create - for - rbac -- name "<name>" \
-- role "Storage Blob Data Owner" \
-- scopes "/subscriptions/<subscription>/resourceGroups/<resource-group>/providers/Microsoft.Storage/storageAccounts/<storage-account>/blobServices/default/containers/<container>" \
> azure - principal . json
See [ "Create an Azure service principal" ] ( https : //docs.microsoft.com/en-us/cli/azure/create-an-azure-service-principal-azure-cli) and ["Assign an Azure role for access to blob data"](https://docs.microsoft.com/en-us/azure/storage/common/storage-auth-aad-rbac-cli) pages for more details.
2022-11-30 12:45:34 +01:00
It may be more convenient to put the credentials directly into the
rclone config file under the ` + " ` client_id ` , ` tenant ` and ` client_secret ` " + `
keys instead of setting ` + " ` service_principal_file ` " + ` .
2022-11-29 16:43:22 +01:00
` ,
Advanced : true ,
2020-12-02 08:40:30 +01:00
} , {
Name : "use_msi" ,
2021-08-16 11:30:01 +02:00
Help : ` Use a managed service identity to authenticate ( only works in Azure ) .
2020-12-02 08:40:30 +01:00
When true , use a [ managed service identity ] ( https : //docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/)
to authenticate to Azure Storage instead of a SAS token or account key .
If the VM ( SS ) on which this program is running has a system - assigned identity , it will
be used by default . If the resource has no system - assigned but exactly one user - assigned identity ,
the user - assigned identity will be used by default . If the resource has multiple user - assigned
identities , the identity to use must be explicitly specified using exactly one of the msi_object_id ,
msi_client_id , or msi_mi_res_id parameters . ` ,
2022-11-29 16:43:22 +01:00
Default : false ,
Advanced : true ,
2020-12-02 08:40:30 +01:00
} , {
2023-07-06 18:55:53 +02:00
Name : "msi_object_id" ,
Help : "Object ID of the user-assigned MSI to use, if any.\n\nLeave blank if msi_client_id or msi_mi_res_id specified." ,
Advanced : true ,
Sensitive : true ,
2020-12-02 08:40:30 +01:00
} , {
2023-07-06 18:55:53 +02:00
Name : "msi_client_id" ,
Help : "Object ID of the user-assigned MSI to use, if any.\n\nLeave blank if msi_object_id or msi_mi_res_id specified." ,
Advanced : true ,
Sensitive : true ,
2020-12-02 08:40:30 +01:00
} , {
2023-07-06 18:55:53 +02:00
Name : "msi_mi_res_id" ,
Help : "Azure resource ID of the user-assigned MSI to use, if any.\n\nLeave blank if msi_client_id or msi_object_id specified." ,
Advanced : true ,
Sensitive : true ,
2019-06-27 05:46:22 +02:00
} , {
2022-11-29 16:43:22 +01:00
Name : "use_emulator" ,
Help : "Uses local storage emulator if provided as 'true'.\n\nLeave blank if using real azure storage endpoint." ,
Default : false ,
Advanced : true ,
2017-07-25 16:18:13 +02:00
} , {
2018-05-14 19:06:57 +02:00
Name : "endpoint" ,
2021-08-16 11:30:01 +02:00
Help : "Endpoint for the service.\n\nLeave blank normally." ,
2018-05-14 19:06:57 +02:00
Advanced : true ,
} , {
Name : "upload_cutoff" ,
2021-08-16 11:30:01 +02:00
Help : "Cutoff for switching to chunked upload (<= 256 MiB) (deprecated)." ,
2018-05-14 19:06:57 +02:00
Advanced : true ,
} , {
2018-10-01 19:36:15 +02:00
Name : "chunk_size" ,
2021-11-12 13:12:20 +01:00
Help : ` Upload chunk size .
2018-10-01 19:36:15 +02:00
Note that this is stored in memory and there may be up to
2021-11-12 13:04:00 +01:00
"--transfers" * "--azureblob-upload-concurrency" chunks stored at once
in memory . ` ,
2019-01-11 18:17:46 +01:00
Default : defaultChunkSize ,
2018-05-14 19:06:57 +02:00
Advanced : true ,
2021-11-12 13:04:00 +01:00
} , {
Name : "upload_concurrency" ,
Help : ` Concurrency for multipart uploads .
This is the number of chunks of the same file that are uploaded
concurrently .
If you are uploading small numbers of large files over high - speed
links and these uploads do not fully utilize your bandwidth , then
increasing this may help to speed up the transfers .
In tests , upload speed increases almost linearly with upload
concurrency . For example to fill a gigabit pipe it may be necessary to
raise this to 64. Note that this will use more memory .
Note that chunks are stored in memory and there may be up to
"--transfers" * "--azureblob-upload-concurrency" chunks stored at once
in memory . ` ,
2021-11-18 16:37:00 +01:00
Default : 16 ,
2021-11-12 13:04:00 +01:00
Advanced : true ,
2018-09-10 21:45:06 +02:00
} , {
2018-10-01 19:36:15 +02:00
Name : "list_chunk" ,
Help : ` Size of blob list .
This sets the number of blobs requested in each listing chunk . Default
is the maximum , 5000. "List blobs" requests are permitted 2 minutes
per megabyte to complete . If an operation is taking longer than 2
minutes per megabyte on average , it will time out (
[ source ] ( https : //docs.microsoft.com/en-us/rest/api/storageservices/setting-timeouts-for-blob-service-operations#exceptions-to-default-timeout-interval)
) . This can be used to limit the number of blobs items to return , to
avoid the time out . ` ,
2018-09-10 21:45:06 +02:00
Default : maxListChunkSize ,
Advanced : true ,
2018-08-19 18:53:59 +02:00
} , {
Name : "access_tier" ,
2023-10-18 09:01:11 +02:00
Help : ` Access tier of blob : hot , cool , cold or archive .
2018-10-01 19:36:15 +02:00
2023-10-18 09:01:11 +02:00
Archived blobs can be restored by setting access tier to hot , cool or
cold . Leave blank if you intend to use default access tier , which is
2018-10-01 19:36:15 +02:00
set at account level
If there is no "access tier" specified , rclone doesn ' t apply any tier .
rclone performs "Set Tier" operation on blobs while uploading , if objects
are not modified , specifying "access tier" to new one will have no effect .
If blobs are in "archive tier" at remote , trying to perform data transfer
operations from remote will not be allowed . User should first restore by
2023-10-18 09:01:11 +02:00
tiering blob to "Hot" , "Cool" or "Cold" . ` ,
2018-08-19 18:53:59 +02:00
Advanced : true ,
2020-11-30 13:09:37 +01:00
} , {
Name : "archive_tier_delete" ,
Default : false ,
Help : fmt . Sprintf ( ` Delete archive tier blobs before overwriting .
Archive tier blobs cannot be updated . So without this flag , if you
attempt to update an archive tier blob , then rclone will produce the
error :
% v
With this flag set then before rclone attempts to overwrite an archive
tier blob , it will delete the existing blob before uploading its
replacement . This has the potential for data loss if the upload fails
( unlike updating a normal blob ) and also may cost more since deleting
archive tier blobs early may be chargable .
` , errCantUpdateArchiveTierBlobs ) ,
Advanced : true ,
2020-04-23 20:47:14 +02:00
} , {
Name : "disable_checksum" ,
Help : ` Don ' t store MD5 checksum with object metadata .
Normally rclone will calculate the MD5 checksum of the input before
uploading it so it can add it to metadata on the object . This is great
for data integrity checking but can cause long delays for large files
to start uploading . ` ,
Default : false ,
Advanced : true ,
2020-04-25 19:36:18 +02:00
} , {
Name : "memory_pool_flush_time" ,
2023-08-16 17:59:39 +02:00
Default : fs . Duration ( time . Minute ) ,
2020-04-25 19:36:18 +02:00
Advanced : true ,
2023-08-16 17:59:39 +02:00
Hide : fs . OptionHideBoth ,
Help : ` How often internal memory buffer pools will be flushed. (no longer used) ` ,
2020-04-25 19:36:18 +02:00
} , {
Name : "memory_pool_use_mmap" ,
2023-08-16 17:59:39 +02:00
Default : false ,
2020-04-25 19:36:18 +02:00
Advanced : true ,
2023-08-16 17:59:39 +02:00
Hide : fs . OptionHideBoth ,
Help : ` Whether to use mmap buffers in internal memory pool. (no longer used) ` ,
2020-01-14 18:33:35 +01:00
} , {
Name : config . ConfigEncoding ,
Help : config . ConfigEncodingHelp ,
Advanced : true ,
2020-01-14 22:51:49 +01:00
Default : ( encoder . EncodeInvalidUtf8 |
encoder . EncodeSlash |
encoder . EncodeCtl |
encoder . EncodeDel |
encoder . EncodeBackSlash |
encoder . EncodeRightPeriod ) ,
2021-03-15 18:18:47 +01:00
} , {
Name : "public_access" ,
2021-08-16 11:30:01 +02:00
Help : "Public access level of a container: blob or container." ,
2022-11-23 17:46:21 +01:00
Default : "" ,
2021-03-15 18:18:47 +01:00
Examples : [ ] fs . OptionExample {
{
2022-11-23 17:46:21 +01:00
Value : "" ,
2021-08-16 11:30:01 +02:00
Help : "The container and its blobs can be accessed only with an authorized request.\nIt's a default value." ,
2021-03-15 18:18:47 +01:00
} , {
2022-11-23 17:46:21 +01:00
Value : string ( container . PublicAccessTypeBlob ) ,
2021-03-15 18:18:47 +01:00
Help : "Blob data within this container can be read via anonymous request." ,
} , {
2022-11-23 17:46:21 +01:00
Value : string ( container . PublicAccessTypeContainer ) ,
2021-03-15 18:18:47 +01:00
Help : "Allow full public read access for container and blob data." ,
} ,
} ,
Advanced : true ,
2023-04-28 18:24:19 +02:00
} , {
Name : "directory_markers" ,
Default : false ,
Advanced : true ,
Help : ` Upload an empty object with a trailing slash when a new directory is created
Empty folders are unsupported for bucket based remotes , this option
creates an empty object ending with "/" , to persist the folder .
This object also has the metadata "` + dirMetaKey + ` = ` + dirMetaValue + `" to conform to
the Microsoft standard .
` ,
2022-11-29 17:00:37 +01:00
} , {
Name : "no_check_container" ,
Help : ` If set , don ' t attempt to check the container exists or create it .
This can be useful when trying to minimise the number of transactions
rclone does if you know the container exists already .
` ,
Default : false ,
Advanced : true ,
2021-09-06 11:41:54 +02:00
} , {
Name : "no_head_object" ,
Help : ` If set, do not do HEAD before GET when getting objects. ` ,
Default : false ,
Advanced : true ,
2018-05-14 19:06:57 +02:00
} } ,
2017-07-25 16:18:13 +02:00
} )
2018-05-14 19:06:57 +02:00
}
// Options defines the configuration for this backend
type Options struct {
2022-11-30 12:45:34 +01:00
Account string ` config:"account" `
EnvAuth bool ` config:"env_auth" `
Key string ` config:"key" `
SASURL string ` config:"sas_url" `
Tenant string ` config:"tenant" `
ClientID string ` config:"client_id" `
ClientSecret string ` config:"client_secret" `
ClientCertificatePath string ` config:"client_certificate_path" `
ClientCertificatePassword string ` config:"client_certificate_password" `
ClientSendCertificateChain bool ` config:"client_send_certificate_chain" `
Username string ` config:"username" `
Password string ` config:"password" `
ServicePrincipalFile string ` config:"service_principal_file" `
UseMSI bool ` config:"use_msi" `
MSIObjectID string ` config:"msi_object_id" `
MSIClientID string ` config:"msi_client_id" `
MSIResourceID string ` config:"msi_mi_res_id" `
Endpoint string ` config:"endpoint" `
ChunkSize fs . SizeSuffix ` config:"chunk_size" `
UploadConcurrency int ` config:"upload_concurrency" `
ListChunkSize uint ` config:"list_chunk" `
AccessTier string ` config:"access_tier" `
ArchiveTierDelete bool ` config:"archive_tier_delete" `
UseEmulator bool ` config:"use_emulator" `
DisableCheckSum bool ` config:"disable_checksum" `
Enc encoder . MultiEncoder ` config:"encoding" `
PublicAccess string ` config:"public_access" `
2023-04-28 18:24:19 +02:00
DirectoryMarkers bool ` config:"directory_markers" `
2022-11-30 12:45:34 +01:00
NoCheckContainer bool ` config:"no_check_container" `
NoHeadObject bool ` config:"no_head_object" `
2017-07-25 16:18:13 +02:00
}
// Fs represents a remote azure server
type Fs struct {
2022-11-23 17:46:21 +01:00
name string // name of this remote
root string // the path we are working on if any
opt Options // parsed config options
ci * fs . ConfigInfo // global config
features * fs . Features // optional features
cntSVCcacheMu sync . Mutex // mutex to protect cntSVCcache
cntSVCcache map [ string ] * container . Client // reference to containerClient per container
svc * service . Client // client to access azblob
rootContainer string // container part of root (if any)
rootDirectory string // directory part of root (if any)
isLimited bool // if limited to one container
cache * bucket . Cache // cache for container creation status
pacer * fs . Pacer // To pace and retry the API calls
uploadToken * pacer . TokenDispenser // control concurrency
publicAccess container . PublicAccessType // Container Public Access Level
2017-07-25 16:18:13 +02:00
}
2020-05-20 12:39:20 +02:00
// Object describes an azure object
2017-07-25 16:18:13 +02:00
type Object struct {
2022-11-23 17:46:21 +01:00
fs * Fs // what this object is part of
remote string // The remote path
modTime time . Time // The modified time of the object if known
md5 string // MD5 hash if known
size int64 // Size of the object
mimeType string // Content-Type of the object
accessTier blob . AccessTier // Blob Access Tier
2023-09-13 16:36:44 +02:00
meta map [ string ] string // blob metadata - take metadataMu when accessing
2017-07-25 16:18:13 +02:00
}
// ------------------------------------------------------------
// Name of the remote (as passed into NewFs)
func ( f * Fs ) Name ( ) string {
return f . name
}
// Root of the remote (as passed into NewFs)
func ( f * Fs ) Root ( ) string {
2019-08-16 11:10:56 +02:00
return f . root
2017-07-25 16:18:13 +02:00
}
// String converts this Fs to a string
func ( f * Fs ) String ( ) string {
2019-08-16 11:10:56 +02:00
if f . rootContainer == "" {
2020-02-19 21:47:50 +01:00
return "Azure root"
2019-08-16 11:10:56 +02:00
}
if f . rootDirectory == "" {
return fmt . Sprintf ( "Azure container %s" , f . rootContainer )
2017-07-25 16:18:13 +02:00
}
2019-08-16 11:10:56 +02:00
return fmt . Sprintf ( "Azure container %s path %s" , f . rootContainer , f . rootDirectory )
2017-07-25 16:18:13 +02:00
}
// Features returns the optional features of this Fs
func ( f * Fs ) Features ( ) * fs . Features {
return f . features
}
2019-08-16 11:10:56 +02:00
// parsePath parses a remote 'url'
func parsePath ( path string ) ( root string ) {
root = strings . Trim ( path , "/" )
2017-07-25 16:18:13 +02:00
return
}
2019-08-16 11:10:56 +02:00
// split returns container and containerPath from the rootRelativePath
// relative to f.root
func ( f * Fs ) split ( rootRelativePath string ) ( containerName , containerPath string ) {
2023-04-28 18:24:19 +02:00
containerName , containerPath = bucket . Split ( bucket . Join ( f . root , rootRelativePath ) )
2020-01-14 18:33:35 +01:00
return f . opt . Enc . FromStandardName ( containerName ) , f . opt . Enc . FromStandardPath ( containerPath )
2019-08-16 11:10:56 +02:00
}
// split returns container and containerPath from the object
func ( o * Object ) split ( ) ( container , containerPath string ) {
return o . fs . split ( o . remote )
}
2018-09-11 03:55:06 +02:00
// validateAccessTier checks if azureblob supports user supplied tier
func validateAccessTier ( tier string ) bool {
2022-11-23 17:46:21 +01:00
return strings . EqualFold ( tier , string ( blob . AccessTierHot ) ) ||
strings . EqualFold ( tier , string ( blob . AccessTierCool ) ) ||
2023-10-18 09:01:11 +02:00
strings . EqualFold ( tier , string ( blob . AccessTierCold ) ) ||
2022-11-23 17:46:21 +01:00
strings . EqualFold ( tier , string ( blob . AccessTierArchive ) )
2018-09-11 03:55:06 +02:00
}
2021-03-15 18:18:47 +01:00
// validatePublicAccess checks if azureblob supports use supplied public access level
func validatePublicAccess ( publicAccess string ) bool {
switch publicAccess {
2022-11-23 17:46:21 +01:00
case "" ,
string ( container . PublicAccessTypeBlob ) ,
string ( container . PublicAccessTypeContainer ) :
2021-03-15 18:18:47 +01:00
// valid cases
return true
default :
return false
}
}
2017-07-25 16:18:13 +02:00
// retryErrorCodes is a slice of error codes that we will retry
var retryErrorCodes = [ ] int {
2020-10-13 23:49:58 +02:00
401 , // Unauthorized (e.g. "Token has expired")
2017-07-25 16:18:13 +02:00
408 , // Request Timeout
429 , // Rate exceeded.
500 , // Get occasional 500 Internal Server Error
503 , // Service Unavailable
504 , // Gateway Time-out
}
// shouldRetry returns a boolean as to whether this resp and err
// deserve to be retried. It returns the err as a convenience
2021-03-11 15:44:01 +01:00
func ( f * Fs ) shouldRetry ( ctx context . Context , err error ) ( bool , error ) {
if fserrors . ContextError ( ctx , & err ) {
return false , err
}
2017-07-25 16:18:13 +02:00
// FIXME interpret special errors - more to do here
2022-11-23 17:46:21 +01:00
if storageErr , ok := err . ( * azcore . ResponseError ) ; ok {
switch storageErr . ErrorCode {
2020-04-23 20:19:45 +02:00
case "InvalidBlobOrBlock" :
// These errors happen sometimes in multipart uploads
// because of block concurrency issues
return true , err
}
2022-11-23 17:46:21 +01:00
statusCode := storageErr . StatusCode
2017-07-25 16:18:13 +02:00
for _ , e := range retryErrorCodes {
if statusCode == e {
return true , err
}
}
}
2018-01-12 17:30:54 +01:00
return fserrors . ShouldRetry ( err ) , err
2017-07-25 16:18:13 +02:00
}
2018-09-07 13:02:27 +02:00
func checkUploadChunkSize ( cs fs . SizeSuffix ) error {
2021-03-02 20:11:57 +01:00
const minChunkSize = fs . SizeSuffixBase
2018-09-07 13:02:27 +02:00
if cs < minChunkSize {
2021-11-04 11:12:57 +01:00
return fmt . Errorf ( "%s is less than %s" , cs , minChunkSize )
2018-09-07 13:02:27 +02:00
}
return nil
}
func ( f * Fs ) setUploadChunkSize ( cs fs . SizeSuffix ) ( old fs . SizeSuffix , err error ) {
err = checkUploadChunkSize ( cs )
if err == nil {
old , f . opt . ChunkSize = f . opt . ChunkSize , cs
}
return
}
2020-12-06 06:13:51 +01:00
type servicePrincipalCredentials struct {
AppID string ` json:"appId" `
Password string ` json:"password" `
Tenant string ` json:"tenant" `
}
2022-11-29 16:43:22 +01:00
// parseServicePrincipalCredentials unmarshals a service principal credentials JSON file as generated by az cli.
func parseServicePrincipalCredentials ( ctx context . Context , credentialsData [ ] byte ) ( * servicePrincipalCredentials , error ) {
var spCredentials servicePrincipalCredentials
if err := json . Unmarshal ( credentialsData , & spCredentials ) ; err != nil {
return nil , fmt . Errorf ( "error parsing credentials from JSON file: %w" , err )
}
// TODO: support certificate credentials
// Validate all fields present
if spCredentials . AppID == "" || spCredentials . Password == "" || spCredentials . Tenant == "" {
return nil , fmt . Errorf ( "missing fields in credentials file" )
}
return & spCredentials , nil
}
2022-11-23 17:46:21 +01:00
// setRoot changes the root of the Fs
func ( f * Fs ) setRoot ( root string ) {
f . root = parsePath ( root )
f . rootContainer , f . rootDirectory = bucket . Split ( f . root )
2020-12-06 06:13:51 +01:00
}
2022-11-23 17:46:21 +01:00
// Wrap the http.Transport to satisfy the Transporter interface
type transporter struct {
http . RoundTripper
}
2019-10-20 17:38:37 +02:00
2022-11-23 17:46:21 +01:00
// Make a new transporter
func newTransporter ( ctx context . Context ) transporter {
return transporter {
RoundTripper : fshttp . NewTransport ( ctx ) ,
2018-11-30 13:08:22 +01:00
}
}
2022-11-23 17:46:21 +01:00
// Do sends the HTTP request and returns the HTTP response or error.
func ( tr transporter ) Do ( req * http . Request ) ( * http . Response , error ) {
return tr . RoundTripper . RoundTrip ( req )
2019-08-16 11:10:56 +02:00
}
2019-02-07 18:41:17 +01:00
// NewFs constructs an Fs from the path, container:path
2020-11-05 16:18:51 +01:00
func NewFs ( ctx context . Context , name , root string , m configmap . Mapper ) ( fs . Fs , error ) {
2018-05-14 19:06:57 +02:00
// Parse config into Options struct
opt := new ( Options )
err := configstruct . Set ( m , opt )
if err != nil {
return nil , err
2017-07-25 16:18:13 +02:00
}
2018-05-14 19:06:57 +02:00
2018-09-07 13:02:27 +02:00
err = checkUploadChunkSize ( opt . ChunkSize )
if err != nil {
2022-06-08 22:54:39 +02:00
return nil , fmt . Errorf ( "chunk size: %w" , err )
2017-07-25 16:18:13 +02:00
}
2018-09-10 21:45:06 +02:00
if opt . ListChunkSize > maxListChunkSize {
2022-06-08 22:54:39 +02:00
return nil , fmt . Errorf ( "blob list size can't be greater than %v - was %v" , maxListChunkSize , opt . ListChunkSize )
2018-09-10 21:45:06 +02:00
}
2017-07-25 16:18:13 +02:00
2018-08-19 18:53:59 +02:00
if opt . AccessTier == "" {
opt . AccessTier = string ( defaultAccessTier )
2018-09-11 03:55:06 +02:00
} else if ! validateAccessTier ( opt . AccessTier ) {
2023-10-18 09:01:11 +02:00
return nil , fmt . Errorf ( "supported access tiers are %s, %s, %s and %s" ,
string ( blob . AccessTierHot ) , string ( blob . AccessTierCool ) , string ( blob . AccessTierCold ) , string ( blob . AccessTierArchive ) )
2018-08-19 18:53:59 +02:00
}
2021-03-15 18:18:47 +01:00
if ! validatePublicAccess ( ( opt . PublicAccess ) ) {
2022-06-08 22:54:39 +02:00
return nil , fmt . Errorf ( "supported public access level are %s and %s" ,
2022-11-23 17:46:21 +01:00
string ( container . PublicAccessTypeBlob ) , string ( container . PublicAccessTypeContainer ) )
2021-03-15 18:18:47 +01:00
}
2020-11-05 12:33:32 +01:00
ci := fs . GetConfig ( ctx )
2018-11-30 13:08:22 +01:00
f := & Fs {
name : name ,
opt : * opt ,
2020-11-05 12:33:32 +01:00
ci : ci ,
pacer : fs . NewPacer ( ctx , pacer . NewS3 ( pacer . MinSleep ( minSleep ) , pacer . MaxSleep ( maxSleep ) , pacer . DecayConstant ( decayConstant ) ) ) ,
uploadToken : pacer . NewTokenDispenser ( ci . Transfers ) ,
2019-08-16 11:10:56 +02:00
cache : bucket . NewCache ( ) ,
2022-11-23 17:46:21 +01:00
cntSVCcache : make ( map [ string ] * container . Client , 1 ) ,
2018-11-30 13:08:22 +01:00
}
2022-11-23 17:46:21 +01:00
f . publicAccess = container . PublicAccessType ( opt . PublicAccess )
2019-08-16 11:10:56 +02:00
f . setRoot ( root )
2018-11-30 13:08:22 +01:00
f . features = ( & fs . Features {
2019-08-16 11:10:56 +02:00
ReadMimeType : true ,
WriteMimeType : true ,
BucketBased : true ,
BucketBasedRootOK : true ,
SetTier : true ,
GetTier : true ,
2020-11-05 17:00:40 +01:00
} ) . Fill ( ctx , f )
2023-04-28 18:24:19 +02:00
if opt . DirectoryMarkers {
f . features . CanHaveEmptyDirectories = true
fs . Debugf ( f , "Using directory markers" )
}
2018-11-30 13:08:22 +01:00
2022-11-29 16:43:22 +01:00
// Client options specifying our own transport
policyClientOptions := policy . ClientOptions {
Transport : newTransporter ( ctx ) ,
}
clientOpt := service . ClientOptions {
ClientOptions : policyClientOptions ,
2022-11-23 17:46:21 +01:00
}
2022-11-29 16:43:22 +01:00
// Here we auth by setting one of cred, sharedKeyCred or f.svc
2018-03-24 15:01:23 +01:00
var (
2022-11-23 17:46:21 +01:00
cred azcore . TokenCredential
sharedKeyCred * service . SharedKeyCredential
2018-03-24 15:01:23 +01:00
)
2022-11-29 16:43:22 +01:00
switch {
case opt . EnvAuth :
// Read account from environment if needed
if opt . Account == "" {
2022-11-30 12:45:34 +01:00
opt . Account , _ = os . LookupEnv ( "AZURE_STORAGE_ACCOUNT_NAME" )
2022-11-29 16:43:22 +01:00
}
2022-11-23 17:46:21 +01:00
// Read credentials from the environment
2022-11-29 16:43:22 +01:00
options := azidentity . DefaultAzureCredentialOptions {
ClientOptions : policyClientOptions ,
}
cred , err = azidentity . NewDefaultAzureCredential ( & options )
2018-09-07 06:43:40 +02:00
if err != nil {
2023-03-24 14:14:51 +01:00
return nil , fmt . Errorf ( "create azure environment credential failed: %w" , err )
2018-09-07 06:43:40 +02:00
}
2022-11-29 16:43:22 +01:00
case opt . UseEmulator :
2022-11-30 12:45:34 +01:00
if opt . Account == "" {
2022-11-29 16:43:22 +01:00
opt . Account = emulatorAccount
}
if opt . Key == "" {
opt . Key = emulatorAccountKey
}
2022-11-30 12:45:34 +01:00
if opt . Endpoint == "" {
2022-11-29 16:43:22 +01:00
opt . Endpoint = emulatorBlobEndpoint
}
sharedKeyCred , err = service . NewSharedKeyCredential ( opt . Account , opt . Key )
if err != nil {
return nil , fmt . Errorf ( "create new shared key credential for emulator failed: %w" , err )
}
case opt . Account != "" && opt . Key != "" :
2022-11-23 17:46:21 +01:00
sharedKeyCred , err = service . NewSharedKeyCredential ( opt . Account , opt . Key )
2018-03-24 15:01:23 +01:00
if err != nil {
2022-11-23 17:46:21 +01:00
return nil , fmt . Errorf ( "create new shared key credential failed: %w" , err )
2020-12-02 08:40:30 +01:00
}
2022-11-29 16:43:22 +01:00
case opt . SASURL != "" :
parts , err := sas . ParseURL ( opt . SASURL )
2018-03-24 15:01:23 +01:00
if err != nil {
2022-11-29 16:43:22 +01:00
return nil , fmt . Errorf ( "failed to parse SAS URL: %w" , err )
2018-03-24 15:01:23 +01:00
}
2022-11-29 16:43:22 +01:00
endpoint := opt . SASURL
containerName := parts . ContainerName
// Check if we have container level SAS or account level SAS
if containerName != "" {
// Container level SAS
if f . rootContainer != "" && containerName != f . rootContainer {
return nil , fmt . Errorf ( "container name in SAS URL (%q) and container provided in command (%q) do not match" , containerName , f . rootContainer )
}
// Rewrite the endpoint string to be without the container
parts . ContainerName = ""
endpoint = parts . String ( )
}
f . svc , err = service . NewClientWithNoCredential ( endpoint , & clientOpt )
if err != nil {
return nil , fmt . Errorf ( "unable to create SAS URL client: %w" , err )
}
// if using Container level SAS put the container client into the cache
if containerName != "" {
_ = f . cntSVC ( containerName )
f . isLimited = true
}
2022-11-30 12:45:34 +01:00
case opt . ClientID != "" && opt . Tenant != "" && opt . ClientSecret != "" :
// Service principal with client secret
options := azidentity . ClientSecretCredentialOptions {
ClientOptions : policyClientOptions ,
}
cred , err = azidentity . NewClientSecretCredential ( opt . Tenant , opt . ClientID , opt . ClientSecret , & options )
if err != nil {
return nil , fmt . Errorf ( "error creating a client secret credential: %w" , err )
}
case opt . ClientID != "" && opt . Tenant != "" && opt . ClientCertificatePath != "" :
// Service principal with certificate
//
// Read the certificate
data , err := os . ReadFile ( env . ShellExpand ( opt . ClientCertificatePath ) )
if err != nil {
return nil , fmt . Errorf ( "error reading client certificate file: %w" , err )
}
// NewClientCertificateCredential requires at least one *x509.Certificate, and a
// crypto.PrivateKey.
//
// ParseCertificates returns these given certificate data in PEM or PKCS12 format.
// It handles common scenarios but has limitations, for example it doesn't load PEM
// encrypted private keys.
var password [ ] byte
if opt . ClientCertificatePassword != "" {
pw , err := obscure . Reveal ( opt . Password )
if err != nil {
return nil , fmt . Errorf ( "certificate password decode failed - did you obscure it?: %w" , err )
}
password = [ ] byte ( pw )
}
certs , key , err := azidentity . ParseCertificates ( data , password )
if err != nil {
return nil , fmt . Errorf ( "failed to parse client certificate file: %w" , err )
}
options := azidentity . ClientCertificateCredentialOptions {
ClientOptions : policyClientOptions ,
SendCertificateChain : opt . ClientSendCertificateChain ,
}
cred , err = azidentity . NewClientCertificateCredential (
opt . Tenant , opt . ClientID , certs , key , & options ,
)
if err != nil {
return nil , fmt . Errorf ( "create azure service principal with client certificate credential failed: %w" , err )
}
case opt . ClientID != "" && opt . Tenant != "" && opt . Username != "" && opt . Password != "" :
// User with username and password
options := azidentity . UsernamePasswordCredentialOptions {
ClientOptions : policyClientOptions ,
}
password , err := obscure . Reveal ( opt . Password )
if err != nil {
return nil , fmt . Errorf ( "user password decode failed - did you obscure it?: %w" , err )
}
cred , err = azidentity . NewUsernamePasswordCredential (
opt . Tenant , opt . ClientID , opt . Username , password , & options ,
)
if err != nil {
return nil , fmt . Errorf ( "authenticate user with password failed: %w" , err )
}
2022-11-29 16:43:22 +01:00
case opt . ServicePrincipalFile != "" :
2022-11-30 12:45:34 +01:00
// Loading service principal credentials from file.
loadedCreds , err := os . ReadFile ( env . ShellExpand ( opt . ServicePrincipalFile ) )
2022-11-29 16:43:22 +01:00
if err != nil {
return nil , fmt . Errorf ( "error opening service principal credentials file: %w" , err )
}
parsedCreds , err := parseServicePrincipalCredentials ( ctx , loadedCreds )
if err != nil {
return nil , fmt . Errorf ( "error parsing service principal credentials file: %w" , err )
}
options := azidentity . ClientSecretCredentialOptions {
ClientOptions : policyClientOptions ,
}
cred , err = azidentity . NewClientSecretCredential ( parsedCreds . Tenant , parsedCreds . AppID , parsedCreds . Password , & options )
2020-12-06 06:13:51 +01:00
if err != nil {
2022-11-29 16:43:22 +01:00
return nil , fmt . Errorf ( "error creating a client secret credential: %w" , err )
2020-12-06 06:13:51 +01:00
}
2022-11-30 12:45:34 +01:00
case opt . UseMSI :
// Specifying a user-assigned identity. Exactly one of the above IDs must be specified.
// Validate and ensure exactly one is set. (To do: better validation.)
var b2i = map [ bool ] int { false : 0 , true : 1 }
set := b2i [ opt . MSIClientID != "" ] + b2i [ opt . MSIObjectID != "" ] + b2i [ opt . MSIResourceID != "" ]
if set > 1 {
return nil , errors . New ( "more than one user-assigned identity ID is set" )
}
var options azidentity . ManagedIdentityCredentialOptions
switch {
case opt . MSIClientID != "" :
options . ID = azidentity . ClientID ( opt . MSIClientID )
case opt . MSIObjectID != "" :
// FIXME this doesn't appear to be in the new SDK?
return nil , fmt . Errorf ( "MSI object ID is currently unsupported" )
case opt . MSIResourceID != "" :
options . ID = azidentity . ResourceID ( opt . MSIResourceID )
}
cred , err = azidentity . NewManagedIdentityCredential ( & options )
if err != nil {
return nil , fmt . Errorf ( "failed to acquire MSI token: %w" , err )
}
2022-11-29 16:43:22 +01:00
default :
return nil , errors . New ( "no authentication method configured" )
}
// Make the client if not already created
if f . svc == nil {
// Work out what the endpoint is if it is still unset
if opt . Endpoint == "" {
if opt . Account == "" {
return nil , fmt . Errorf ( "account must be set: can't make service URL" )
}
u , err := url . Parse ( fmt . Sprintf ( "https://%s.%s" , opt . Account , storageDefaultBaseURL ) )
if err != nil {
2022-11-30 12:45:34 +01:00
return nil , fmt . Errorf ( "failed to make azure storage URL from account: %w" , err )
2022-11-29 16:43:22 +01:00
}
opt . Endpoint = u . String ( )
}
if sharedKeyCred != nil {
// Shared key cred
f . svc , err = service . NewClientWithSharedKeyCredential ( opt . Endpoint , sharedKeyCred , & clientOpt )
if err != nil {
return nil , fmt . Errorf ( "create client with shared key failed: %w" , err )
}
} else if cred != nil {
// Azidentity cred
f . svc , err = service . NewClient ( opt . Endpoint , cred , & clientOpt )
if err != nil {
return nil , fmt . Errorf ( "create client failed: %w" , err )
}
}
}
if f . svc == nil {
return nil , fmt . Errorf ( "internal error: auth failed to make credentials or client" )
2017-07-25 16:18:13 +02:00
}
2019-08-16 11:10:56 +02:00
if f . rootContainer != "" && f . rootDirectory != "" {
2017-07-25 16:18:13 +02:00
// Check to see if the (container,directory) is actually an existing file
oldRoot := f . root
2019-08-16 11:10:56 +02:00
newRoot , leaf := path . Split ( oldRoot )
f . setRoot ( newRoot )
_ , err := f . NewObject ( ctx , leaf )
2017-07-25 16:18:13 +02:00
if err != nil {
2019-01-04 13:32:28 +01:00
if err == fs . ErrorObjectNotFound || err == fs . ErrorNotAFile {
// File doesn't exist or is a directory so return old f
2019-08-16 11:10:56 +02:00
f . setRoot ( oldRoot )
2017-07-25 16:18:13 +02:00
return f , nil
}
return nil , err
}
// return an error with an fs which points to the parent
return f , fs . ErrorIsFile
}
return f , nil
}
2022-11-23 17:46:21 +01:00
// return the container client for the container passed in
func ( f * Fs ) cntSVC ( containerName string ) ( containerClient * container . Client ) {
f . cntSVCcacheMu . Lock ( )
defer f . cntSVCcacheMu . Unlock ( )
2019-08-16 11:10:56 +02:00
var ok bool
2022-11-23 17:46:21 +01:00
if containerClient , ok = f . cntSVCcache [ containerName ] ; ! ok {
containerClient = f . svc . NewContainerClient ( containerName )
f . cntSVCcache [ containerName ] = containerClient
2019-08-16 11:10:56 +02:00
}
2022-11-23 17:46:21 +01:00
return containerClient
2019-08-16 11:10:56 +02:00
}
2017-07-25 16:18:13 +02:00
// Return an Object from a path
//
// If it can't be found it returns the error fs.ErrorObjectNotFound.
2023-04-28 18:24:19 +02:00
func ( f * Fs ) newObjectWithInfo ( ctx context . Context , remote string , info * container . BlobItem ) ( fs . Object , error ) {
2017-07-25 16:18:13 +02:00
o := & Object {
fs : f ,
remote : remote ,
}
if info != nil {
2018-07-13 17:21:49 +02:00
err := o . decodeMetaDataFromBlob ( info )
2017-07-25 16:18:13 +02:00
if err != nil {
return nil , err
}
2021-09-06 11:41:54 +02:00
} else if ! o . fs . opt . NoHeadObject {
2023-04-28 18:24:19 +02:00
err := o . readMetaData ( ctx ) // reads info and headers, returning an error
2017-07-25 16:18:13 +02:00
if err != nil {
return nil , err
}
}
return o , nil
}
// NewObject finds the Object at remote. If it can't be found
// it returns the error fs.ErrorObjectNotFound.
2019-06-17 10:34:30 +02:00
func ( f * Fs ) NewObject ( ctx context . Context , remote string ) ( fs . Object , error ) {
2023-04-28 18:24:19 +02:00
return f . newObjectWithInfo ( ctx , remote , nil )
2017-07-25 16:18:13 +02:00
}
2022-11-23 17:46:21 +01:00
// getBlobSVC creates a blob client
func ( f * Fs ) getBlobSVC ( container , containerPath string ) * blob . Client {
2023-01-12 13:36:56 +01:00
return f . cntSVC ( container ) . NewBlobClient ( containerPath )
2022-11-23 17:46:21 +01:00
}
// getBlockBlobSVC creates a block blob client
func ( f * Fs ) getBlockBlobSVC ( container , containerPath string ) * blockblob . Client {
2023-01-12 13:36:56 +01:00
return f . cntSVC ( container ) . NewBlockBlobClient ( containerPath )
2017-07-25 16:18:13 +02:00
}
2018-07-13 17:21:49 +02:00
// updateMetadataWithModTime adds the modTime passed in to o.meta.
func ( o * Object ) updateMetadataWithModTime ( modTime time . Time ) {
2023-09-13 16:36:44 +02:00
metadataMu . Lock ( )
defer metadataMu . Unlock ( )
2017-07-25 16:18:13 +02:00
// Make sure o.meta is not nil
if o . meta == nil {
o . meta = make ( map [ string ] string , 1 )
}
// Set modTimeKey in it
o . meta [ modTimeKey ] = modTime . Format ( timeFormatOut )
}
2019-01-04 13:32:28 +01:00
// Returns whether file is a directory marker or not
2023-01-12 13:21:21 +01:00
func isDirectoryMarker ( size int64 , metadata map [ string ] * string , remote string ) bool {
2019-01-04 13:32:28 +01:00
// Directory markers are 0 length
if size == 0 {
endsWithSlash := strings . HasSuffix ( remote , "/" )
2022-12-13 11:45:40 +01:00
if endsWithSlash || remote == "" {
2019-01-04 13:32:28 +01:00
return true
}
2022-12-13 11:45:40 +01:00
// Note that metadata with hdi_isfolder = true seems to be a
// defacto standard for marking blobs as directories.
// Note also that the metadata hasn't been normalised to lower case yet
for k , v := range metadata {
2023-04-28 18:24:19 +02:00
if v != nil && strings . EqualFold ( k , dirMetaKey ) && * v == dirMetaValue {
2022-12-13 11:45:40 +01:00
return true
}
}
2022-11-23 17:46:21 +01:00
}
return false
}
2017-07-25 16:18:13 +02:00
// listFn is called from list to handle an object
2022-11-23 17:46:21 +01:00
type listFn func ( remote string , object * container . BlobItem , isDirectory bool ) error
2017-07-25 16:18:13 +02:00
// list lists the objects into the function supplied from
// the container and root supplied
//
// dir is the starting directory, "" for root
2019-08-16 11:10:56 +02:00
//
// The remote has prefix removed from it and if addContainer is set then
// it adds the container to the start.
2022-11-23 17:46:21 +01:00
func ( f * Fs ) list ( ctx context . Context , containerName , directory , prefix string , addContainer bool , recurse bool , maxResults int32 , fn listFn ) error {
if f . cache . IsDeleted ( containerName ) {
2017-07-25 16:18:13 +02:00
return fs . ErrorDirNotFound
}
2019-08-16 11:10:56 +02:00
if prefix != "" {
prefix += "/"
}
if directory != "" {
directory += "/"
2017-07-25 16:18:13 +02:00
}
delimiter := ""
if ! recurse {
delimiter = "/"
}
2018-07-13 17:21:49 +02:00
2022-11-23 17:46:21 +01:00
pager := f . cntSVC ( containerName ) . NewListBlobsHierarchyPager ( delimiter , & container . ListBlobsHierarchyOptions {
// Copy, Metadata, Snapshots, UncommittedBlobs, Deleted, Tags, Versions, LegalHold, ImmutabilityPolicy, DeletedWithVersions bool
Include : container . ListBlobsInclude {
2018-07-13 17:21:49 +02:00
Copy : false ,
2017-07-25 16:18:13 +02:00
Metadata : true ,
2018-07-13 17:21:49 +02:00
Snapshots : false ,
2017-07-25 16:18:13 +02:00
UncommittedBlobs : false ,
2018-07-13 17:21:49 +02:00
Deleted : false ,
2017-07-25 16:18:13 +02:00
} ,
2022-11-23 17:46:21 +01:00
Prefix : & directory ,
MaxResults : & maxResults ,
} )
2023-04-28 18:24:19 +02:00
foundItems := 0
2022-11-23 17:46:21 +01:00
for pager . More ( ) {
var response container . ListBlobsHierarchyResponse
2017-07-25 16:18:13 +02:00
err := f . pacer . Call ( func ( ) ( bool , error ) {
var err error
2022-11-23 17:46:21 +01:00
response , err = pager . NextPage ( ctx )
//response, err = f.srv.ListBlobsHierarchySegment(ctx, marker, delimiter, options)
2021-03-11 15:44:01 +01:00
return f . shouldRetry ( ctx , err )
2017-07-25 16:18:13 +02:00
} )
2018-07-13 17:21:49 +02:00
2017-07-25 16:18:13 +02:00
if err != nil {
2018-07-13 17:21:49 +02:00
// Check http error code along with service code, current SDK doesn't populate service code correctly sometimes
2022-11-23 17:46:21 +01:00
if storageErr , ok := err . ( * azcore . ResponseError ) ; ok && ( storageErr . ErrorCode == string ( bloberror . ContainerNotFound ) || storageErr . StatusCode == http . StatusNotFound ) {
2017-07-25 16:18:13 +02:00
return fs . ErrorDirNotFound
}
return err
}
2018-07-13 17:21:49 +02:00
// Advance marker to next
2022-11-23 17:46:21 +01:00
// marker = response.NextMarker
2023-04-28 18:24:19 +02:00
foundItems += len ( response . Segment . BlobItems )
2018-07-13 17:21:49 +02:00
for i := range response . Segment . BlobItems {
2022-11-23 17:46:21 +01:00
file := response . Segment . BlobItems [ i ]
2017-07-25 16:18:13 +02:00
// Finish if file name no longer has prefix
// if prefix != "" && !strings.HasPrefix(file.Name, prefix) {
// return nil
// }
2022-11-23 17:46:21 +01:00
if file . Name == nil {
fs . Debugf ( f , "Nil name received" )
continue
}
remote := f . opt . Enc . ToStandardPath ( * file . Name )
2019-09-21 13:36:16 +02:00
if ! strings . HasPrefix ( remote , prefix ) {
fs . Debugf ( f , "Odd name received %q" , remote )
2017-07-25 16:18:13 +02:00
continue
}
2023-04-28 18:24:19 +02:00
isDirectory := isDirectoryMarker ( * file . Properties . ContentLength , file . Metadata , remote )
if isDirectory {
// Don't insert the root directory
if remote == directory {
continue
}
// process directory markers as directories
remote = strings . TrimRight ( remote , "/" )
}
2023-06-10 15:18:59 +02:00
remote = remote [ len ( prefix ) : ]
if addContainer {
remote = path . Join ( containerName , remote )
}
2017-07-25 16:18:13 +02:00
// Send object
2023-04-28 18:24:19 +02:00
err = fn ( remote , file , isDirectory )
2017-07-25 16:18:13 +02:00
if err != nil {
return err
}
}
// Send the subdirectories
2023-04-28 18:24:19 +02:00
foundItems += len ( response . Segment . BlobPrefixes )
2018-07-13 17:21:49 +02:00
for _ , remote := range response . Segment . BlobPrefixes {
2022-11-23 17:46:21 +01:00
if remote . Name == nil {
fs . Debugf ( f , "Nil prefix received" )
continue
}
remote := strings . TrimRight ( * remote . Name , "/" )
2020-01-14 18:33:35 +01:00
remote = f . opt . Enc . ToStandardPath ( remote )
2019-08-16 11:10:56 +02:00
if ! strings . HasPrefix ( remote , prefix ) {
2017-07-25 16:18:13 +02:00
fs . Debugf ( f , "Odd directory name received %q" , remote )
continue
}
2019-08-16 11:10:56 +02:00
remote = remote [ len ( prefix ) : ]
if addContainer {
2022-11-23 17:46:21 +01:00
remote = path . Join ( containerName , remote )
2018-12-05 22:34:10 +01:00
}
2017-07-25 16:18:13 +02:00
// Send object
err = fn ( remote , nil , true )
if err != nil {
return err
}
}
}
2023-04-28 18:24:19 +02:00
if f . opt . DirectoryMarkers && foundItems == 0 && directory != "" {
// Determine whether the directory exists or not by whether it has a marker
_ , err := f . readMetaData ( ctx , containerName , directory )
if err != nil {
if err == fs . ErrorObjectNotFound {
return fs . ErrorDirNotFound
}
return err
}
}
2017-07-25 16:18:13 +02:00
return nil
}
// Convert a list item into a DirEntry
2023-04-28 18:24:19 +02:00
func ( f * Fs ) itemToDirEntry ( ctx context . Context , remote string , object * container . BlobItem , isDirectory bool ) ( fs . DirEntry , error ) {
2017-07-25 16:18:13 +02:00
if isDirectory {
d := fs . NewDir ( remote , time . Time { } )
return d , nil
}
2023-04-28 18:24:19 +02:00
o , err := f . newObjectWithInfo ( ctx , remote , object )
2017-07-25 16:18:13 +02:00
if err != nil {
return nil , err
}
return o , nil
}
2020-12-10 22:08:58 +01:00
// Check to see if this is a limited container and the container is not found
func ( f * Fs ) containerOK ( container string ) bool {
if ! f . isLimited {
return true
}
2022-11-23 17:46:21 +01:00
f . cntSVCcacheMu . Lock ( )
defer f . cntSVCcacheMu . Unlock ( )
for limitedContainer := range f . cntSVCcache {
2020-12-10 22:08:58 +01:00
if container == limitedContainer {
return true
}
}
return false
}
2017-07-25 16:18:13 +02:00
// listDir lists a single directory
2022-11-23 17:46:21 +01:00
func ( f * Fs ) listDir ( ctx context . Context , containerName , directory , prefix string , addContainer bool ) ( entries fs . DirEntries , err error ) {
if ! f . containerOK ( containerName ) {
2020-12-10 22:08:58 +01:00
return nil , fs . ErrorDirNotFound
}
2022-11-23 17:46:21 +01:00
err = f . list ( ctx , containerName , directory , prefix , addContainer , false , int32 ( f . opt . ListChunkSize ) , func ( remote string , object * container . BlobItem , isDirectory bool ) error {
2023-04-28 18:24:19 +02:00
entry , err := f . itemToDirEntry ( ctx , remote , object , isDirectory )
2017-07-25 16:18:13 +02:00
if err != nil {
return err
}
if entry != nil {
entries = append ( entries , entry )
}
return nil
} )
if err != nil {
return nil , err
}
2018-03-01 13:11:34 +01:00
// container must be present if listing succeeded
2022-11-23 17:46:21 +01:00
f . cache . MarkOK ( containerName )
2017-07-25 16:18:13 +02:00
return entries , nil
}
// listContainers returns all the containers to out
2019-08-22 22:30:55 +02:00
func ( f * Fs ) listContainers ( ctx context . Context ) ( entries fs . DirEntries , err error ) {
2019-08-16 11:10:56 +02:00
if f . isLimited {
2022-11-23 17:46:21 +01:00
f . cntSVCcacheMu . Lock ( )
for container := range f . cntSVCcache {
2019-08-16 11:10:56 +02:00
d := fs . NewDir ( container , time . Time { } )
entries = append ( entries , d )
}
2022-11-23 17:46:21 +01:00
f . cntSVCcacheMu . Unlock ( )
2019-08-16 11:10:56 +02:00
return entries , nil
}
2022-11-23 17:46:21 +01:00
err = f . listContainersToFn ( func ( Name string , LastModified time . Time ) error {
d := fs . NewDir ( f . opt . Enc . ToStandardName ( Name ) , LastModified )
f . cache . MarkOK ( Name )
2017-07-25 16:18:13 +02:00
entries = append ( entries , d )
return nil
} )
if err != nil {
return nil , err
}
return entries , nil
}
// List the objects and directories in dir into entries. The
// entries can be returned in any order but should be for a
// complete directory.
//
// dir should be "" to list the root, and should not have
// trailing slashes.
//
// This should return ErrDirNotFound if the directory isn't
// found.
2019-06-17 10:34:30 +02:00
func ( f * Fs ) List ( ctx context . Context , dir string ) ( entries fs . DirEntries , err error ) {
2019-08-16 11:10:56 +02:00
container , directory := f . split ( dir )
if container == "" {
2019-08-22 22:30:55 +02:00
if directory != "" {
return nil , fs . ErrorListBucketRequired
}
return f . listContainers ( ctx )
2017-07-25 16:18:13 +02:00
}
2019-08-16 11:10:56 +02:00
return f . listDir ( ctx , container , directory , f . rootDirectory , f . rootContainer == "" )
2017-07-25 16:18:13 +02:00
}
// ListR lists the objects and directories of the Fs starting
// from dir recursively into out.
//
// dir should be "" to start from the root, and should not
// have trailing slashes.
//
// This should return ErrDirNotFound if the directory isn't
// found.
//
// It should call callback for each tranche of entries read.
// These need not be returned in any particular order. If
// callback returns an error then the listing will stop
// immediately.
//
// Don't implement this unless you have a more efficient way
// of listing recursively that doing a directory traversal.
2019-06-17 10:34:30 +02:00
func ( f * Fs ) ListR ( ctx context . Context , dir string , callback fs . ListRCallback ) ( err error ) {
2022-11-23 17:46:21 +01:00
containerName , directory := f . split ( dir )
2018-01-12 17:30:54 +01:00
list := walk . NewListRHelper ( callback )
2022-11-23 17:46:21 +01:00
listR := func ( containerName , directory , prefix string , addContainer bool ) error {
return f . list ( ctx , containerName , directory , prefix , addContainer , true , int32 ( f . opt . ListChunkSize ) , func ( remote string , object * container . BlobItem , isDirectory bool ) error {
2023-04-28 18:24:19 +02:00
entry , err := f . itemToDirEntry ( ctx , remote , object , isDirectory )
2019-08-16 11:10:56 +02:00
if err != nil {
return err
}
return list . Add ( entry )
} )
}
2022-11-23 17:46:21 +01:00
if containerName == "" {
2019-08-22 22:30:55 +02:00
entries , err := f . listContainers ( ctx )
2019-08-16 11:10:56 +02:00
if err != nil {
return err
}
for _ , entry := range entries {
err = list . Add ( entry )
if err != nil {
return err
}
container := entry . Remote ( )
err = listR ( container , "" , f . rootDirectory , true )
if err != nil {
return err
}
2019-08-22 22:30:55 +02:00
// container must be present if listing succeeded
f . cache . MarkOK ( container )
2019-08-16 11:10:56 +02:00
}
} else {
2022-11-23 17:46:21 +01:00
if ! f . containerOK ( containerName ) {
2020-12-10 22:08:58 +01:00
return fs . ErrorDirNotFound
}
2022-11-23 17:46:21 +01:00
err = listR ( containerName , directory , f . rootDirectory , f . rootContainer == "" )
2017-07-25 16:18:13 +02:00
if err != nil {
return err
}
2019-08-22 22:30:55 +02:00
// container must be present if listing succeeded
2022-11-23 17:46:21 +01:00
f . cache . MarkOK ( containerName )
2017-07-25 16:18:13 +02:00
}
return list . Flush ( )
}
// listContainerFn is called from listContainersToFn to handle a container
2022-11-23 17:46:21 +01:00
type listContainerFn func ( Name string , LastModified time . Time ) error
2017-07-25 16:18:13 +02:00
// listContainersToFn lists the containers to the function supplied
func ( f * Fs ) listContainersToFn ( fn listContainerFn ) error {
2022-11-23 17:46:21 +01:00
max := int32 ( f . opt . ListChunkSize )
pager := f . svc . NewListContainersPager ( & service . ListContainersOptions {
Include : service . ListContainersInclude { Metadata : true , Deleted : true } ,
MaxResults : & max ,
} )
2018-07-13 17:21:49 +02:00
ctx := context . Background ( )
2022-11-23 17:46:21 +01:00
for pager . More ( ) {
var response service . ListContainersResponse
2018-07-13 17:21:49 +02:00
err := f . pacer . Call ( func ( ) ( bool , error ) {
var err error
2022-11-23 17:46:21 +01:00
response , err = pager . NextPage ( ctx )
2021-03-11 15:44:01 +01:00
return f . shouldRetry ( ctx , err )
2018-07-13 17:21:49 +02:00
} )
2017-07-25 16:18:13 +02:00
if err != nil {
return err
}
2018-07-13 17:21:49 +02:00
2022-11-23 17:46:21 +01:00
for _ , cnt := range response . ContainerItems {
if cnt == nil || cnt . Name == nil || cnt . Properties == nil || cnt . Properties . LastModified == nil {
fs . Debugf ( f , "nil returned in container info" )
}
err = fn ( * cnt . Name , * cnt . Properties . LastModified )
2018-07-13 17:21:49 +02:00
if err != nil {
return err
}
}
2017-07-25 16:18:13 +02:00
}
2018-07-13 17:21:49 +02:00
2017-07-25 16:18:13 +02:00
return nil
}
// Put the object into the container
//
2022-08-05 17:35:41 +02:00
// Copy the reader in to the new object which is returned.
2017-07-25 16:18:13 +02:00
//
// The new object may have been created if an error is returned
2019-06-17 10:34:30 +02:00
func ( f * Fs ) Put ( ctx context . Context , in io . Reader , src fs . ObjectInfo , options ... fs . OpenOption ) ( fs . Object , error ) {
2017-07-25 16:18:13 +02:00
// Temporary Object under construction
fs := & Object {
fs : f ,
remote : src . Remote ( ) ,
}
2019-06-17 10:34:30 +02:00
return fs , fs . Update ( ctx , in , src , options ... )
2017-07-25 16:18:13 +02:00
}
2020-05-13 22:29:21 +02:00
// PutStream uploads to the remote path with the modTime given of indeterminate size
func ( f * Fs ) PutStream ( ctx context . Context , in io . Reader , src fs . ObjectInfo , options ... fs . OpenOption ) ( fs . Object , error ) {
return f . Put ( ctx , in , src , options ... )
}
2023-04-28 18:24:19 +02:00
// Create directory marker file and parents
func ( f * Fs ) createDirectoryMarker ( ctx context . Context , container , dir string ) error {
if ! f . opt . DirectoryMarkers || container == "" {
return nil
}
// Object to be uploaded
o := & Object {
fs : f ,
modTime : time . Now ( ) ,
meta : map [ string ] string {
dirMetaKey : dirMetaValue ,
} ,
}
for {
_ , containerPath := f . split ( dir )
// Don't create the directory marker if it is the bucket or at the very root
if containerPath == "" {
break
}
o . remote = dir + "/"
// Check to see if object already exists
_ , err := f . readMetaData ( ctx , container , containerPath + "/" )
if err == nil {
return nil
}
// Upload it if not
fs . Debugf ( o , "Creating directory marker" )
content := io . Reader ( strings . NewReader ( "" ) )
err = o . Update ( ctx , content , o )
if err != nil {
return fmt . Errorf ( "creating directory marker failed: %w" , err )
}
// Now check parent directory exists
dir = path . Dir ( dir )
if dir == "/" || dir == "." {
break
}
}
return nil
}
2017-07-25 16:18:13 +02:00
// Mkdir creates the container if it doesn't exist
2019-06-17 10:34:30 +02:00
func ( f * Fs ) Mkdir ( ctx context . Context , dir string ) error {
2019-08-16 11:10:56 +02:00
container , _ := f . split ( dir )
2023-04-28 18:24:19 +02:00
e := f . makeContainer ( ctx , container )
if e != nil {
return e
}
return f . createDirectoryMarker ( ctx , container , dir )
}
// mkdirParent creates the parent bucket/directory if it doesn't exist
func ( f * Fs ) mkdirParent ( ctx context . Context , remote string ) error {
remote = strings . TrimRight ( remote , "/" )
dir := path . Dir ( remote )
if dir == "/" || dir == "." {
dir = ""
}
return f . Mkdir ( ctx , dir )
2019-08-22 22:30:55 +02:00
}
// makeContainer creates the container if it doesn't exist
func ( f * Fs ) makeContainer ( ctx context . Context , container string ) error {
2022-11-29 17:00:37 +01:00
if f . opt . NoCheckContainer {
return nil
}
2019-08-16 11:10:56 +02:00
return f . cache . Create ( container , func ( ) error {
2020-05-13 10:11:51 +02:00
// If this is a SAS URL limited to a container then assume it is already created
if f . isLimited {
return nil
}
2022-11-23 17:46:21 +01:00
opt := service . CreateContainerOptions {
// Optional. Specifies a user-defined name-value pair associated with the blob.
//Metadata map[string]string
// Optional. Specifies the encryption scope settings to set on the container.
//CpkScopeInfo *CpkScopeInfo
}
2022-12-22 15:19:38 +01:00
if f . publicAccess != "" {
// Specifies whether data in the container may be accessed publicly and the level of access
opt . Access = & f . publicAccess
}
2019-08-16 11:10:56 +02:00
// now try to create the container
return f . pacer . Call ( func ( ) ( bool , error ) {
2022-11-23 17:46:21 +01:00
_ , err := f . svc . CreateContainer ( ctx , container , & opt )
2019-08-16 11:10:56 +02:00
if err != nil {
2022-11-23 17:46:21 +01:00
if storageErr , ok := err . ( * azcore . ResponseError ) ; ok {
switch bloberror . Code ( storageErr . ErrorCode ) {
case bloberror . ContainerAlreadyExists :
2019-08-16 11:10:56 +02:00
return false , nil
2022-11-23 17:46:21 +01:00
case bloberror . ContainerBeingDeleted :
2019-08-16 11:10:56 +02:00
// From https://docs.microsoft.com/en-us/rest/api/storageservices/delete-container
// When a container is deleted, a container with the same name cannot be created
// for at least 30 seconds; the container may not be available for more than 30
// seconds if the service is still processing the request.
time . Sleep ( 6 * time . Second ) // default 10 retries will be 60 seconds
f . cache . MarkDeleted ( container )
return true , err
2022-11-29 16:41:46 +01:00
case bloberror . AuthorizationFailure :
// Assume that the user does not have permission to
// create the container and carry on anyway.
2022-11-29 17:00:37 +01:00
fs . Debugf ( f , "Tried to create container but got %s error - carrying on assuming container exists. Use no_check_container to stop this check.." , storageErr . ErrorCode )
2022-11-29 16:41:46 +01:00
return false , nil
2019-08-16 11:10:56 +02:00
}
2017-07-25 16:18:13 +02:00
}
}
2021-03-11 15:44:01 +01:00
return f . shouldRetry ( ctx , err )
2019-08-16 11:10:56 +02:00
} )
} , nil )
2017-07-25 16:18:13 +02:00
}
2019-08-16 11:10:56 +02:00
// isEmpty checks to see if a given (container, directory) is empty and returns an error if not
2022-11-23 17:46:21 +01:00
func ( f * Fs ) isEmpty ( ctx context . Context , containerName , directory string ) ( err error ) {
2017-07-25 16:18:13 +02:00
empty := true
2022-11-23 17:46:21 +01:00
err = f . list ( ctx , containerName , directory , f . rootDirectory , f . rootContainer == "" , true , 1 , func ( remote string , object * container . BlobItem , isDirectory bool ) error {
2017-07-25 16:18:13 +02:00
empty = false
return nil
} )
if err != nil {
return err
}
if ! empty {
return fs . ErrorDirectoryNotEmpty
}
return nil
}
// deleteContainer deletes the container. It can delete a full
// container so use isEmpty if you don't want that.
2022-11-23 17:46:21 +01:00
func ( f * Fs ) deleteContainer ( ctx context . Context , containerName string ) error {
return f . cache . Remove ( containerName , func ( ) error {
getOptions := container . GetPropertiesOptions { }
delOptions := container . DeleteOptions { }
2019-08-16 11:10:56 +02:00
return f . pacer . Call ( func ( ) ( bool , error ) {
2022-11-23 17:46:21 +01:00
_ , err := f . cntSVC ( containerName ) . GetProperties ( ctx , & getOptions )
2019-08-16 11:10:56 +02:00
if err == nil {
2022-11-23 17:46:21 +01:00
_ , err = f . cntSVC ( containerName ) . Delete ( ctx , & delOptions )
2019-08-16 11:10:56 +02:00
}
2018-07-13 17:21:49 +02:00
2019-08-16 11:10:56 +02:00
if err != nil {
// Check http error code along with service code, current SDK doesn't populate service code correctly sometimes
2022-11-23 17:46:21 +01:00
if storageErr , ok := err . ( * azcore . ResponseError ) ; ok && ( storageErr . ErrorCode == string ( bloberror . ContainerNotFound ) || storageErr . StatusCode == http . StatusNotFound ) {
2019-08-16 11:10:56 +02:00
return false , fs . ErrorDirNotFound
}
2021-03-11 15:44:01 +01:00
return f . shouldRetry ( ctx , err )
2018-07-13 17:21:49 +02:00
}
2021-03-11 15:44:01 +01:00
return f . shouldRetry ( ctx , err )
2019-08-16 11:10:56 +02:00
} )
2017-07-25 16:18:13 +02:00
} )
}
// Rmdir deletes the container if the fs is at the root
//
// Returns an error if it isn't empty
2019-06-17 10:34:30 +02:00
func ( f * Fs ) Rmdir ( ctx context . Context , dir string ) error {
2019-08-16 11:10:56 +02:00
container , directory := f . split ( dir )
2023-04-28 18:24:19 +02:00
// Remove directory marker file
2023-09-05 18:07:44 +02:00
if f . opt . DirectoryMarkers && container != "" && directory != "" {
2023-04-28 18:24:19 +02:00
o := & Object {
fs : f ,
remote : dir + "/" ,
}
fs . Debugf ( o , "Removing directory marker" )
err := o . Remove ( ctx )
if err != nil {
return fmt . Errorf ( "removing directory marker failed: %w" , err )
}
}
2019-08-16 11:10:56 +02:00
if container == "" || directory != "" {
return nil
}
err := f . isEmpty ( ctx , container , directory )
2017-07-25 16:18:13 +02:00
if err != nil {
return err
}
2019-08-16 11:10:56 +02:00
return f . deleteContainer ( ctx , container )
2017-07-25 16:18:13 +02:00
}
// Precision of the remote
func ( f * Fs ) Precision ( ) time . Duration {
return time . Nanosecond
}
// Hashes returns the supported hash sets.
2018-01-12 17:30:54 +01:00
func ( f * Fs ) Hashes ( ) hash . Set {
2018-01-18 21:27:52 +01:00
return hash . Set ( hash . MD5 )
2017-07-25 16:18:13 +02:00
}
// Purge deletes all the files and directories including the old versions.
2020-06-04 23:25:14 +02:00
func ( f * Fs ) Purge ( ctx context . Context , dir string ) error {
2019-08-16 11:10:56 +02:00
container , directory := f . split ( dir )
2023-09-05 18:07:44 +02:00
if container == "" {
return errors . New ( "can't purge from root" )
}
if directory != "" {
2019-08-16 11:10:56 +02:00
// Delegate to caller if not root of a container
2017-07-25 16:18:13 +02:00
return fs . ErrorCantPurge
}
2019-08-16 11:10:56 +02:00
return f . deleteContainer ( ctx , container )
2017-07-25 16:18:13 +02:00
}
2020-10-13 23:43:40 +02:00
// Copy src to this remote using server-side copy operations.
2017-07-25 16:18:13 +02:00
//
2022-08-05 17:35:41 +02:00
// This is stored with the remote path given.
2017-07-25 16:18:13 +02:00
//
2022-08-05 17:35:41 +02:00
// It returns the destination Object and a possible error.
2017-07-25 16:18:13 +02:00
//
// Will only be called if src.Fs().Name() == f.Name()
//
// If it isn't possible then return fs.ErrorCantCopy
2019-06-17 10:34:30 +02:00
func ( f * Fs ) Copy ( ctx context . Context , src fs . Object , remote string ) ( fs . Object , error ) {
2019-08-16 11:10:56 +02:00
dstContainer , dstPath := f . split ( remote )
2023-04-28 18:24:19 +02:00
err := f . mkdirParent ( ctx , remote )
2017-07-25 16:18:13 +02:00
if err != nil {
return nil , err
}
srcObj , ok := src . ( * Object )
if ! ok {
fs . Debugf ( src , "Can't copy - not same remote type" )
return nil , fs . ErrorCantCopy
}
2022-11-23 17:46:21 +01:00
dstBlobSVC := f . getBlobSVC ( dstContainer , dstPath )
srcBlobSVC := srcObj . getBlobSVC ( )
srcURL := srcBlobSVC . URL ( )
2018-07-13 17:21:49 +02:00
2022-11-23 17:46:21 +01:00
options := blob . StartCopyFromURLOptions {
2023-04-04 17:22:02 +02:00
Tier : parseTier ( f . opt . AccessTier ) ,
2018-07-13 17:21:49 +02:00
}
2022-11-23 17:46:21 +01:00
var startCopy blob . StartCopyFromURLResponse
2017-07-25 16:18:13 +02:00
err = f . pacer . Call ( func ( ) ( bool , error ) {
2022-11-23 17:46:21 +01:00
startCopy , err = dstBlobSVC . StartCopyFromURL ( ctx , srcURL , & options )
2021-03-11 15:44:01 +01:00
return f . shouldRetry ( ctx , err )
2017-07-25 16:18:13 +02:00
} )
if err != nil {
return nil , err
}
2018-07-13 17:21:49 +02:00
2022-11-23 17:46:21 +01:00
copyStatus := startCopy . CopyStatus
getOptions := blob . GetPropertiesOptions { }
for copyStatus != nil && string ( * copyStatus ) == string ( container . CopyStatusTypePending ) {
2018-07-13 17:21:49 +02:00
time . Sleep ( 1 * time . Second )
2022-11-23 17:46:21 +01:00
getMetadata , err := dstBlobSVC . GetProperties ( ctx , & getOptions )
2018-07-13 17:21:49 +02:00
if err != nil {
return nil , err
}
2022-11-23 17:46:21 +01:00
copyStatus = getMetadata . CopyStatus
2018-07-13 17:21:49 +02:00
}
2019-06-17 10:34:30 +02:00
return f . NewObject ( ctx , remote )
2017-07-25 16:18:13 +02:00
}
// ------------------------------------------------------------
// Fs returns the parent Fs
func ( o * Object ) Fs ( ) fs . Info {
return o . fs
}
// Return a string version
func ( o * Object ) String ( ) string {
if o == nil {
return "<nil>"
}
return o . remote
}
// Remote returns the remote path
func ( o * Object ) Remote ( ) string {
return o . remote
}
// Hash returns the MD5 of an object returning a lowercase hex string
2019-06-17 10:34:30 +02:00
func ( o * Object ) Hash ( ctx context . Context , t hash . Type ) ( string , error ) {
2018-01-18 21:27:52 +01:00
if t != hash . MD5 {
return "" , hash . ErrUnsupported
2017-07-25 16:18:13 +02:00
}
// Convert base64 encoded md5 into lower case hex
if o . md5 == "" {
return "" , nil
}
data , err := base64 . StdEncoding . DecodeString ( o . md5 )
if err != nil {
2022-06-08 22:54:39 +02:00
return "" , fmt . Errorf ( "failed to decode Content-MD5: %q: %w" , o . md5 , err )
2017-07-25 16:18:13 +02:00
}
return hex . EncodeToString ( data ) , nil
}
// Size returns the size of an object in bytes
func ( o * Object ) Size ( ) int64 {
return o . size
}
2023-01-12 13:21:21 +01:00
// Set o.metadata from metadata
func ( o * Object ) setMetadata ( metadata map [ string ] * string ) {
2023-09-13 16:36:44 +02:00
metadataMu . Lock ( )
defer metadataMu . Unlock ( )
2018-07-13 17:21:49 +02:00
if len ( metadata ) > 0 {
2022-11-23 17:46:21 +01:00
// Lower case the metadata
o . meta = make ( map [ string ] string , len ( metadata ) )
for k , v := range metadata {
2023-01-12 13:21:21 +01:00
if v != nil {
o . meta [ strings . ToLower ( k ) ] = * v
}
2022-11-23 17:46:21 +01:00
}
2022-12-15 16:10:53 +01:00
// Set o.modTime from metadata if it exists and
// UseServerModTime isn't in use.
if modTime , ok := o . meta [ modTimeKey ] ; ! o . fs . ci . UseServerModTime && ok {
2017-07-25 16:18:13 +02:00
when , err := time . Parse ( timeFormatIn , modTime )
if err != nil {
fs . Debugf ( o , "Couldn't parse %v = %q: %v" , modTimeKey , modTime , err )
}
o . modTime = when
}
} else {
o . meta = nil
}
2018-07-13 17:21:49 +02:00
}
2023-01-12 13:21:21 +01:00
// Get metadata from o.meta
func ( o * Object ) getMetadata ( ) ( metadata map [ string ] * string ) {
2023-09-13 16:36:44 +02:00
metadataMu . Lock ( )
defer metadataMu . Unlock ( )
2023-01-12 13:21:21 +01:00
if len ( o . meta ) == 0 {
return nil
}
metadata = make ( map [ string ] * string , len ( o . meta ) )
for k , v := range o . meta {
2023-04-28 17:41:18 +02:00
v := v
2023-01-12 13:21:21 +01:00
metadata [ k ] = & v
2022-11-23 17:46:21 +01:00
}
2023-01-12 13:21:21 +01:00
return metadata
2022-11-23 17:46:21 +01:00
}
2018-07-13 17:21:49 +02:00
// decodeMetaDataFromPropertiesResponse sets the metadata from the data passed in
//
// Sets
2022-08-05 17:35:41 +02:00
//
// o.id
// o.modTime
// o.size
// o.md5
// o.meta
2022-11-23 17:46:21 +01:00
func ( o * Object ) decodeMetaDataFromPropertiesResponse ( info * blob . GetPropertiesResponse ) ( err error ) {
metadata := info . Metadata
var size int64
if info . ContentLength == nil {
size = - 1
} else {
size = * info . ContentLength
}
2019-01-04 13:32:28 +01:00
if isDirectoryMarker ( size , metadata , o . remote ) {
return fs . ErrorNotAFile
}
2018-09-07 06:43:40 +02:00
// NOTE - Client library always returns MD5 as base64 decoded string, Object needs to maintain
// this as base64 encoded string.
2022-11-23 17:46:21 +01:00
o . md5 = base64 . StdEncoding . EncodeToString ( info . ContentMD5 )
if info . ContentType == nil {
o . mimeType = ""
} else {
o . mimeType = * info . ContentType
}
2019-01-04 13:32:28 +01:00
o . size = size
2022-11-23 17:46:21 +01:00
if info . LastModified == nil {
o . modTime = time . Now ( )
} else {
o . modTime = * info . LastModified
}
if info . AccessTier == nil {
o . accessTier = blob . AccessTier ( "" )
} else {
o . accessTier = blob . AccessTier ( * info . AccessTier )
}
2019-01-04 13:32:28 +01:00
o . setMetadata ( metadata )
2018-07-13 17:21:49 +02:00
return nil
}
2022-11-23 17:46:21 +01:00
func ( o * Object ) decodeMetaDataFromDownloadResponse ( info * blob . DownloadStreamResponse ) ( err error ) {
metadata := info . Metadata
var size int64
if info . ContentLength == nil {
size = - 1
} else {
size = * info . ContentLength
}
2021-09-06 11:41:54 +02:00
if isDirectoryMarker ( size , metadata , o . remote ) {
return fs . ErrorNotAFile
}
// NOTE - Client library always returns MD5 as base64 decoded string, Object needs to maintain
// this as base64 encoded string.
2022-11-23 17:46:21 +01:00
o . md5 = base64 . StdEncoding . EncodeToString ( info . ContentMD5 )
if info . ContentType == nil {
o . mimeType = ""
} else {
o . mimeType = * info . ContentType
}
2021-09-06 11:41:54 +02:00
o . size = size
2022-11-23 17:46:21 +01:00
if info . LastModified == nil {
o . modTime = time . Now ( )
} else {
o . modTime = * info . LastModified
}
// FIXME response doesn't appear to have AccessTier in?
// if info.AccessTier == nil {
// o.accessTier = blob.AccessTier("")
// } else {
// o.accessTier = blob.AccessTier(*info.AccessTier)
// }
2021-09-06 11:41:54 +02:00
o . setMetadata ( metadata )
2021-10-19 21:10:18 +02:00
// If it was a Range request, the size is wrong, so correct it
2022-11-23 17:46:21 +01:00
if info . ContentRange != nil {
contentRange := * info . ContentRange
2021-10-19 21:10:18 +02:00
slash := strings . IndexRune ( contentRange , '/' )
if slash >= 0 {
i , err := strconv . ParseInt ( contentRange [ slash + 1 : ] , 10 , 64 )
if err == nil {
o . size = i
} else {
fs . Debugf ( o , "Failed to find parse integer from in %q: %v" , contentRange , err )
}
} else {
fs . Debugf ( o , "Failed to find length in %q" , contentRange )
}
}
2021-09-06 11:41:54 +02:00
return nil
}
2022-11-23 17:46:21 +01:00
func ( o * Object ) decodeMetaDataFromBlob ( info * container . BlobItem ) ( err error ) {
if info . Properties == nil {
return errors . New ( "nil Properties in decodeMetaDataFromBlob" )
}
2019-01-04 13:32:28 +01:00
metadata := info . Metadata
2022-11-23 17:46:21 +01:00
var size int64
if info . Properties . ContentLength == nil {
size = - 1
} else {
size = * info . Properties . ContentLength
}
2023-04-28 18:24:19 +02:00
if isDirectoryMarker ( size , metadata , o . remote ) {
2019-01-04 13:32:28 +01:00
return fs . ErrorNotAFile
}
2018-09-07 06:43:40 +02:00
// NOTE - Client library always returns MD5 as base64 decoded string, Object needs to maintain
// this as base64 encoded string.
o . md5 = base64 . StdEncoding . EncodeToString ( info . Properties . ContentMD5 )
2022-11-23 17:46:21 +01:00
if info . Properties . ContentType == nil {
o . mimeType = ""
} else {
o . mimeType = * info . Properties . ContentType
}
2019-01-04 13:32:28 +01:00
o . size = size
2022-11-23 17:46:21 +01:00
if info . Properties . LastModified == nil {
o . modTime = time . Now ( )
} else {
o . modTime = * info . Properties . LastModified
}
if info . Properties . AccessTier == nil {
o . accessTier = blob . AccessTier ( "" )
} else {
o . accessTier = * info . Properties . AccessTier
}
2023-01-12 13:21:21 +01:00
o . setMetadata ( metadata )
2022-11-23 17:46:21 +01:00
2017-07-25 16:18:13 +02:00
return nil
}
2022-11-23 17:46:21 +01:00
// getBlobSVC creates a blob client
func ( o * Object ) getBlobSVC ( ) * blob . Client {
container , directory := o . split ( )
return o . fs . getBlobSVC ( container , directory )
}
2017-07-25 16:18:13 +02:00
// clearMetaData clears enough metadata so readMetaData will re-read it
func ( o * Object ) clearMetaData ( ) {
o . modTime = time . Time { }
}
2023-04-28 18:24:19 +02:00
// readMetaData gets the metadata if it hasn't already been fetched
func ( f * Fs ) readMetaData ( ctx context . Context , container , containerPath string ) ( blobProperties blob . GetPropertiesResponse , err error ) {
if ! f . containerOK ( container ) {
return blobProperties , fs . ErrorObjectNotFound
}
blb := f . getBlobSVC ( container , containerPath )
// Read metadata (this includes metadata)
options := blob . GetPropertiesOptions { }
err = f . pacer . Call ( func ( ) ( bool , error ) {
blobProperties , err = blb . GetProperties ( ctx , & options )
return f . shouldRetry ( ctx , err )
} )
if err != nil {
// On directories - GetProperties does not work and current SDK does not populate service code correctly hence check regular http response as well
if storageErr , ok := err . ( * azcore . ResponseError ) ; ok && ( storageErr . ErrorCode == string ( bloberror . BlobNotFound ) || storageErr . StatusCode == http . StatusNotFound ) {
return blobProperties , fs . ErrorObjectNotFound
}
return blobProperties , err
}
return blobProperties , nil
}
2017-07-25 16:18:13 +02:00
// readMetaData gets the metadata if it hasn't already been fetched
//
// Sets
2022-08-05 17:35:41 +02:00
//
// o.id
// o.modTime
// o.size
// o.md5
2023-04-28 18:24:19 +02:00
func ( o * Object ) readMetaData ( ctx context . Context ) ( err error ) {
2017-07-25 16:18:13 +02:00
if ! o . modTime . IsZero ( ) {
return nil
}
2023-04-28 18:24:19 +02:00
container , containerPath := o . split ( )
blobProperties , err := o . fs . readMetaData ( ctx , container , containerPath )
2017-07-25 16:18:13 +02:00
if err != nil {
return err
}
2022-11-23 17:46:21 +01:00
return o . decodeMetaDataFromPropertiesResponse ( & blobProperties )
2017-07-25 16:18:13 +02:00
}
// ModTime returns the modification time of the object
//
// It attempts to read the objects mtime and if that isn't present the
// LastModified returned in the http headers
2019-06-17 10:34:30 +02:00
func ( o * Object ) ModTime ( ctx context . Context ) ( result time . Time ) {
2017-07-25 16:18:13 +02:00
// The error is logged in readMetaData
2023-04-28 18:24:19 +02:00
_ = o . readMetaData ( ctx )
2017-07-25 16:18:13 +02:00
return o . modTime
}
// SetModTime sets the modification time of the local fs object
2019-06-17 10:34:30 +02:00
func ( o * Object ) SetModTime ( ctx context . Context , modTime time . Time ) error {
2023-09-13 16:36:44 +02:00
o . updateMetadataWithModTime ( modTime )
2018-07-13 17:21:49 +02:00
2022-11-23 17:46:21 +01:00
blb := o . getBlobSVC ( )
opt := blob . SetMetadataOptions { }
2017-07-25 16:18:13 +02:00
err := o . fs . pacer . Call ( func ( ) ( bool , error ) {
2023-01-12 13:21:21 +01:00
_ , err := blb . SetMetadata ( ctx , o . getMetadata ( ) , & opt )
2021-03-11 15:44:01 +01:00
return o . fs . shouldRetry ( ctx , err )
2017-07-25 16:18:13 +02:00
} )
if err != nil {
return err
}
o . modTime = modTime
return nil
}
// Storable returns if this object is storable
func ( o * Object ) Storable ( ) bool {
return true
}
// Open an object for read
2019-06-17 10:34:30 +02:00
func ( o * Object ) Open ( ctx context . Context , options ... fs . OpenOption ) ( in io . ReadCloser , err error ) {
2018-07-13 17:21:49 +02:00
// Offset and Count for range download
var offset int64
var count int64
2022-11-23 17:46:21 +01:00
if o . AccessTier ( ) == blob . AccessTierArchive {
2023-10-18 09:01:11 +02:00
return nil , fmt . Errorf ( "blob in archive tier, you need to set tier to hot, cool, cold first" )
2018-08-19 18:53:59 +02:00
}
2019-08-06 16:18:08 +02:00
fs . FixRangeOption ( options , o . size )
2017-07-25 16:18:13 +02:00
for _ , option := range options {
switch x := option . ( type ) {
case * fs . RangeOption :
2018-07-13 17:21:49 +02:00
offset , count = x . Decode ( o . size )
if count < 0 {
count = o . size - offset
2017-07-25 16:18:13 +02:00
}
case * fs . SeekOption :
2018-07-13 17:21:49 +02:00
offset = x . Offset
2017-07-25 16:18:13 +02:00
default :
if option . Mandatory ( ) {
fs . Logf ( o , "Unsupported mandatory option: %v" , option )
}
}
}
2022-11-23 17:46:21 +01:00
blb := o . getBlobSVC ( )
opt := blob . DownloadStreamOptions {
// When set to true and specified together with the Range, the service returns the MD5 hash for the range, as long as the
// range is less than or equal to 4 MB in size.
//RangeGetContentMD5 *bool
// Range specifies a range of bytes. The default value is all bytes.
//Range HTTPRange
Range : blob . HTTPRange {
Offset : offset ,
Count : count ,
} ,
// AccessConditions *AccessConditions
// CpkInfo *CpkInfo
// CpkScopeInfo *CpkScopeInfo
}
var downloadResponse blob . DownloadStreamResponse
2017-07-25 16:18:13 +02:00
err = o . fs . pacer . Call ( func ( ) ( bool , error ) {
2022-11-23 17:46:21 +01:00
downloadResponse , err = blb . DownloadStream ( ctx , & opt )
2021-03-11 15:44:01 +01:00
return o . fs . shouldRetry ( ctx , err )
2017-07-25 16:18:13 +02:00
} )
if err != nil {
2021-11-04 11:12:57 +01:00
return nil , fmt . Errorf ( "failed to open for download: %w" , err )
2017-07-25 16:18:13 +02:00
}
2022-11-23 17:46:21 +01:00
err = o . decodeMetaDataFromDownloadResponse ( & downloadResponse )
2021-09-06 11:41:54 +02:00
if err != nil {
2021-11-04 11:12:57 +01:00
return nil , fmt . Errorf ( "failed to decode metadata for download: %w" , err )
2021-09-06 11:41:54 +02:00
}
2022-11-23 17:46:21 +01:00
return downloadResponse . Body , nil
2017-07-25 16:18:13 +02:00
}
2022-11-23 17:46:21 +01:00
// Converts a string into a pointer to a string
func pString ( s string ) * string {
return & s
}
// readSeekCloser joins an io.Reader and an io.Seeker and provides a no-op io.Closer
type readSeekCloser struct {
io . Reader
io . Seeker
}
// Close does nothing
func ( rs * readSeekCloser ) Close ( ) error {
return nil
}
2023-08-16 17:59:39 +02:00
// increment the array as LSB binary
func increment ( xs * [ 8 ] byte ) {
2022-11-23 17:46:21 +01:00
for i , digit := range xs {
newDigit := digit + 1
xs [ i ] = newDigit
if newDigit >= digit {
// exit if no carry
break
}
}
}
2023-08-16 17:59:39 +02:00
// record chunk number and id for Close
type azBlock struct {
chunkNumber int
id string
}
2022-11-23 17:46:21 +01:00
2023-08-16 17:59:39 +02:00
// Implements the fs.ChunkWriter interface
type azChunkWriter struct {
chunkSize int64
size int64
f * Fs
ui uploadInfo
blocksMu sync . Mutex // protects the below
blocks [ ] azBlock // list of blocks for finalize
binaryBlockID [ 8 ] byte // block counter as LSB first 8 bytes
o * Object
}
// OpenChunkWriter returns the chunk size and a ChunkWriter
2022-11-23 17:46:21 +01:00
//
2023-08-16 17:59:39 +02:00
// Pass in the remote and the src object
// You can also use options to hint at the desired chunk size
2023-09-01 18:25:15 +02:00
func ( f * Fs ) OpenChunkWriter ( ctx context . Context , remote string , src fs . ObjectInfo , options ... fs . OpenOption ) ( info fs . ChunkWriterInfo , writer fs . ChunkWriter , err error ) {
2023-08-16 17:59:39 +02:00
// Temporary Object under construction
o := & Object {
fs : f ,
remote : remote ,
}
ui , err := o . prepareUpload ( ctx , src , options )
if err != nil {
2023-09-01 18:25:15 +02:00
return info , nil , fmt . Errorf ( "failed to prepare upload: %w" , err )
2023-08-16 17:59:39 +02:00
}
2022-11-23 17:46:21 +01:00
// Calculate correct partSize
2023-08-16 17:59:39 +02:00
partSize := f . opt . ChunkSize
2022-11-23 17:46:21 +01:00
totalParts := - 1
2023-08-16 17:59:39 +02:00
size := src . Size ( )
2022-11-23 17:46:21 +01:00
// Note that the max size of file is 4.75 TB (100 MB X 50,000
// blocks) and this is bigger than the max uncommitted block
// size (9.52 TB) so we do not need to part commit block lists
// or garbage collect uncommitted blocks.
//
// See: https://docs.microsoft.com/en-gb/rest/api/storageservices/put-block
// size can be -1 here meaning we don't know the size of the incoming file. We use ChunkSize
// buffers here (default 4MB). With a maximum number of parts (50,000) this will be a file of
// 195GB which seems like a not too unreasonable limit.
if size == - 1 {
warnStreamUpload . Do ( func ( ) {
2023-08-16 17:59:39 +02:00
fs . Logf ( f , "Streaming uploads using chunk size %v will have maximum file size of %v" ,
f . opt . ChunkSize , partSize * fs . SizeSuffix ( blockblob . MaxBlocks ) )
2022-11-23 17:46:21 +01:00
} )
} else {
2023-08-16 17:59:39 +02:00
partSize = chunksize . Calculator ( remote , size , blockblob . MaxBlocks , f . opt . ChunkSize )
2022-11-23 17:46:21 +01:00
if partSize > fs . SizeSuffix ( blockblob . MaxStageBlockBytes ) {
2023-09-01 18:25:15 +02:00
return info , nil , fmt . Errorf ( "can't upload as it is too big %v - takes more than %d chunks of %v" , fs . SizeSuffix ( size ) , fs . SizeSuffix ( blockblob . MaxBlocks ) , fs . SizeSuffix ( blockblob . MaxStageBlockBytes ) )
2022-11-23 17:46:21 +01:00
}
totalParts = int ( fs . SizeSuffix ( size ) / partSize )
if fs . SizeSuffix ( size ) % partSize != 0 {
totalParts ++
}
}
fs . Debugf ( o , "Multipart upload session started for %d parts of size %v" , totalParts , partSize )
2023-08-16 17:59:39 +02:00
chunkWriter := & azChunkWriter {
chunkSize : int64 ( partSize ) ,
size : size ,
f : f ,
ui : ui ,
o : o ,
}
2023-09-01 18:25:15 +02:00
info = fs . ChunkWriterInfo {
ChunkSize : int64 ( partSize ) ,
Concurrency : o . fs . opt . UploadConcurrency ,
//LeavePartsOnError: o.fs.opt.LeavePartsOnError,
}
2023-08-16 17:59:39 +02:00
fs . Debugf ( o , "open chunk writer: started multipart upload" )
2023-09-01 18:25:15 +02:00
return info , chunkWriter , nil
2023-08-16 17:59:39 +02:00
}
2022-11-23 17:46:21 +01:00
2023-08-16 17:59:39 +02:00
// WriteChunk will write chunk number with reader bytes, where chunk number >= 0
func ( w * azChunkWriter ) WriteChunk ( ctx context . Context , chunkNumber int , reader io . ReadSeeker ) ( int64 , error ) {
if chunkNumber < 0 {
err := fmt . Errorf ( "invalid chunk number provided: %v" , chunkNumber )
return - 1 , err
}
2022-11-23 17:46:21 +01:00
2023-08-16 17:59:39 +02:00
// Upload the block, with MD5 for check
m := md5 . New ( )
currentChunkSize , err := io . Copy ( m , reader )
if err != nil {
return - 1 , err
}
// If no data read, don't write the chunk
if currentChunkSize == 0 {
return 0 , nil
}
md5sum := m . Sum ( nil )
transactionalMD5 := md5sum [ : ]
2022-11-23 17:46:21 +01:00
2023-08-16 17:59:39 +02:00
// increment the blockID and save the blocks for finalize
increment ( & w . binaryBlockID )
blockID := base64 . StdEncoding . EncodeToString ( w . binaryBlockID [ : ] )
2022-11-23 17:46:21 +01:00
2023-08-16 17:59:39 +02:00
// Save the blockID for the commit
w . blocksMu . Lock ( )
w . blocks = append ( w . blocks , azBlock {
chunkNumber : chunkNumber ,
id : blockID ,
} )
w . blocksMu . Unlock ( )
2022-11-23 17:46:21 +01:00
2023-08-16 17:59:39 +02:00
err = w . f . pacer . Call ( func ( ) ( bool , error ) {
// rewind the reader on retry and after reading md5
_ , err = reader . Seek ( 0 , io . SeekStart )
if err != nil {
return false , err
2022-11-23 17:46:21 +01:00
}
2023-08-16 17:59:39 +02:00
options := blockblob . StageBlockOptions {
// Specify the transactional md5 for the body, to be validated by the service.
TransactionalValidation : blob . TransferValidationTypeMD5 ( transactionalMD5 ) ,
}
_ , err = w . ui . blb . StageBlock ( ctx , blockID , & readSeekCloser { Reader : reader , Seeker : reader } , & options )
if err != nil {
if chunkNumber <= 8 {
return w . f . shouldRetry ( ctx , err )
}
// retry all chunks once have done the first few
return true , err
}
return false , nil
} )
2022-11-23 17:46:21 +01:00
if err != nil {
2023-08-16 17:59:39 +02:00
return - 1 , fmt . Errorf ( "failed to upload chunk %d with %v bytes: %w" , chunkNumber + 1 , currentChunkSize , err )
}
fs . Debugf ( w . o , "multipart upload wrote chunk %d with %v bytes" , chunkNumber + 1 , currentChunkSize )
return currentChunkSize , err
}
2023-09-23 13:20:01 +02:00
// Abort the multipart upload.
2023-08-16 17:59:39 +02:00
//
// FIXME it would be nice to delete uncommitted blocks.
//
// See: https://github.com/rclone/rclone/issues/5583
//
// However there doesn't seem to be an easy way of doing this other than
// by deleting the target.
//
// This means that a failed upload deletes the target which isn't ideal.
//
// Uploading a zero length blob and deleting it will remove the
// uncommitted blocks I think.
//
// Could check to see if a file exists already and if it doesn't then
// create a 0 length file and delete it to flush the uncommitted
// blocks.
//
// This is what azcopy does
// https://github.com/MicrosoftDocs/azure-docs/issues/36347#issuecomment-541457962
func ( w * azChunkWriter ) Abort ( ctx context . Context ) error {
fs . Debugf ( w . o , "multipart upload aborted (did nothing - see issue #5583)" )
return nil
}
// Close and finalise the multipart upload
func ( w * azChunkWriter ) Close ( ctx context . Context ) ( err error ) {
// sort the completed parts by part number
sort . Slice ( w . blocks , func ( i , j int ) bool {
return w . blocks [ i ] . chunkNumber < w . blocks [ j ] . chunkNumber
} )
// Create a list of block IDs
blockIDs := make ( [ ] string , len ( w . blocks ) )
for i := range w . blocks {
blockIDs [ i ] = w . blocks [ i ] . id
2022-11-23 17:46:21 +01:00
}
options := blockblob . CommitBlockListOptions {
2023-08-16 17:59:39 +02:00
Metadata : w . o . getMetadata ( ) ,
Tier : parseTier ( w . f . opt . AccessTier ) ,
HTTPHeaders : & w . ui . httpHeaders ,
2022-11-23 17:46:21 +01:00
}
// Finalise the upload session
2023-08-16 17:59:39 +02:00
err = w . f . pacer . Call ( func ( ) ( bool , error ) {
_ , err := w . ui . blb . CommitBlockList ( ctx , blockIDs , & options )
return w . f . shouldRetry ( ctx , err )
2022-11-23 17:46:21 +01:00
} )
if err != nil {
2023-08-16 17:59:39 +02:00
return fmt . Errorf ( "failed to complete multipart upload: %w" , err )
2022-11-23 17:46:21 +01:00
}
2023-08-16 17:59:39 +02:00
fs . Debugf ( w . o , "multipart upload finished" )
return err
}
var warnStreamUpload sync . Once
// uploadMultipart uploads a file using multipart upload
//
// Write a larger blob, using CreateBlockBlob, PutBlock, and PutBlockList.
func ( o * Object ) uploadMultipart ( ctx context . Context , in io . Reader , src fs . ObjectInfo , options ... fs . OpenOption ) ( ui uploadInfo , err error ) {
chunkWriter , err := multipart . UploadMultipart ( ctx , src , in , multipart . UploadMultipartOptions {
Open : o . fs ,
OpenOptions : options ,
} )
if err != nil {
return ui , err
}
return chunkWriter . ( * azChunkWriter ) . ui , nil
2022-11-23 17:46:21 +01:00
}
// uploadSinglepart uploads a short blob using a single part upload
2023-08-16 17:59:39 +02:00
func ( o * Object ) uploadSinglepart ( ctx context . Context , in io . Reader , size int64 , ui uploadInfo ) ( err error ) {
chunkSize := int64 ( o . fs . opt . ChunkSize )
2022-11-23 17:46:21 +01:00
// fs.Debugf(o, "Single part upload starting of object %d bytes", size)
2023-08-16 17:59:39 +02:00
if size > chunkSize || size < 0 {
return fmt . Errorf ( "internal error: single part upload size too big %d > %d" , size , chunkSize )
2022-11-23 17:46:21 +01:00
}
2023-08-16 17:59:39 +02:00
rw := multipart . NewRW ( )
defer fs . CheckClose ( rw , & err )
2022-11-23 17:46:21 +01:00
2023-08-16 17:59:39 +02:00
n , err := io . CopyN ( rw , in , size + 1 )
2022-11-23 17:46:21 +01:00
if err != nil && err != io . EOF {
return fmt . Errorf ( "single part upload read failed: %w" , err )
}
2023-08-16 17:59:39 +02:00
if n != size {
2022-11-23 17:46:21 +01:00
return fmt . Errorf ( "single part upload: expecting to read %d bytes but read %d" , size , n )
}
2023-08-16 17:59:39 +02:00
rs := & readSeekCloser { Reader : rw , Seeker : rw }
2022-11-23 17:46:21 +01:00
options := blockblob . UploadOptions {
2023-01-12 13:21:21 +01:00
Metadata : o . getMetadata ( ) ,
2023-04-04 17:22:02 +02:00
Tier : parseTier ( o . fs . opt . AccessTier ) ,
2023-08-16 17:59:39 +02:00
HTTPHeaders : & ui . httpHeaders ,
2022-11-23 17:46:21 +01:00
}
2023-08-16 17:59:39 +02:00
return o . fs . pacer . Call ( func ( ) ( bool , error ) {
// rewind the reader on retry
_ , err = rs . Seek ( 0 , io . SeekStart )
if err != nil {
return false , err
}
_ , err = ui . blb . Upload ( ctx , rs , & options )
2022-11-23 17:46:21 +01:00
return o . fs . shouldRetry ( ctx , err )
} )
}
2023-08-16 17:59:39 +02:00
// Info needed for an upload
type uploadInfo struct {
blb * blockblob . Client
httpHeaders blob . HTTPHeaders
isDirMarker bool
}
// Prepare the object for upload
func ( o * Object ) prepareUpload ( ctx context . Context , src fs . ObjectInfo , options [ ] fs . OpenOption ) ( ui uploadInfo , err error ) {
2021-11-27 17:18:18 +01:00
container , containerPath := o . split ( )
if container == "" || containerPath == "" {
2023-08-16 17:59:39 +02:00
return ui , fmt . Errorf ( "can't upload to root - need a container" )
2021-11-27 17:18:18 +01:00
}
2023-04-28 18:24:19 +02:00
// Create parent dir/bucket if not saving directory marker
2023-09-13 16:36:44 +02:00
metadataMu . Lock ( )
2023-09-03 19:09:31 +02:00
_ , ui . isDirMarker = o . meta [ dirMetaKey ]
2023-09-13 16:36:44 +02:00
metadataMu . Unlock ( )
2023-09-03 19:09:31 +02:00
if ! ui . isDirMarker {
2023-04-28 18:24:19 +02:00
err = o . fs . mkdirParent ( ctx , o . remote )
if err != nil {
2023-08-16 17:59:39 +02:00
return ui , err
2023-04-28 18:24:19 +02:00
}
2017-07-25 16:18:13 +02:00
}
2019-09-10 17:35:25 +02:00
2018-07-13 17:21:49 +02:00
// Update Mod time
2019-06-17 10:34:30 +02:00
o . updateMetadataWithModTime ( src . ModTime ( ctx ) )
2018-07-13 17:21:49 +02:00
if err != nil {
2023-08-16 17:59:39 +02:00
return ui , err
2018-07-13 17:21:49 +02:00
}
2022-11-23 17:46:21 +01:00
// Create the HTTP headers for the upload
2023-08-16 17:59:39 +02:00
ui . httpHeaders = blob . HTTPHeaders {
2022-11-23 17:46:21 +01:00
BlobContentType : pString ( fs . MimeType ( ctx , src ) ) ,
}
2019-09-10 17:35:25 +02:00
// Compute the Content-MD5 of the file. As we stream all uploads it
2019-02-28 22:31:07 +01:00
// will be set in PutBlockList API call using the 'x-ms-blob-content-md5' header
2020-04-23 20:47:14 +02:00
if ! o . fs . opt . DisableCheckSum {
if sourceMD5 , _ := src . Hash ( ctx , hash . MD5 ) ; sourceMD5 != "" {
sourceMD5bytes , err := hex . DecodeString ( sourceMD5 )
if err == nil {
2023-08-16 17:59:39 +02:00
ui . httpHeaders . BlobContentMD5 = sourceMD5bytes
2020-04-23 20:47:14 +02:00
} else {
fs . Debugf ( o , "Failed to decode %q as MD5: %v" , sourceMD5 , err )
}
2017-07-25 16:18:13 +02:00
}
}
2022-11-14 10:51:49 +01:00
// Apply upload options (also allows one to overwrite content-type)
for _ , option := range options {
key , value := option . Header ( )
lowerKey := strings . ToLower ( key )
switch lowerKey {
case "" :
// ignore
case "cache-control" :
2023-08-16 17:59:39 +02:00
ui . httpHeaders . BlobCacheControl = pString ( value )
2022-11-14 10:51:49 +01:00
case "content-disposition" :
2023-08-16 17:59:39 +02:00
ui . httpHeaders . BlobContentDisposition = pString ( value )
2022-11-14 10:51:49 +01:00
case "content-encoding" :
2023-08-16 17:59:39 +02:00
ui . httpHeaders . BlobContentEncoding = pString ( value )
2022-11-14 10:51:49 +01:00
case "content-language" :
2023-08-16 17:59:39 +02:00
ui . httpHeaders . BlobContentLanguage = pString ( value )
2022-11-14 10:51:49 +01:00
case "content-type" :
2023-08-16 17:59:39 +02:00
ui . httpHeaders . BlobContentType = pString ( value )
}
}
ui . blb = o . fs . getBlockBlobSVC ( container , containerPath )
return ui , nil
}
// Update the object with the contents of the io.Reader, modTime and size
//
// The new object may have been created if an error is returned
func ( o * Object ) Update ( ctx context . Context , in io . Reader , src fs . ObjectInfo , options ... fs . OpenOption ) ( err error ) {
if o . accessTier == blob . AccessTierArchive {
if o . fs . opt . ArchiveTierDelete {
fs . Debugf ( o , "deleting archive tier blob before updating" )
err = o . Remove ( ctx )
if err != nil {
return fmt . Errorf ( "failed to delete archive blob before updating: %w" , err )
}
} else {
return errCantUpdateArchiveTierBlobs
2022-11-14 10:51:49 +01:00
}
}
2022-11-23 17:46:21 +01:00
size := src . Size ( )
2023-08-16 17:59:39 +02:00
multipartUpload := size < 0 || size > int64 ( o . fs . opt . ChunkSize )
var ui uploadInfo
2022-04-26 18:37:40 +02:00
2022-11-23 17:46:21 +01:00
if multipartUpload {
2023-08-16 17:59:39 +02:00
ui , err = o . uploadMultipart ( ctx , in , src , options ... )
2022-11-23 17:46:21 +01:00
} else {
2023-08-16 17:59:39 +02:00
ui , err = o . prepareUpload ( ctx , src , options )
if err != nil {
return fmt . Errorf ( "failed to prepare upload: %w" , err )
}
err = o . uploadSinglepart ( ctx , in , size , ui )
2018-07-13 17:21:49 +02:00
}
2017-07-25 16:18:13 +02:00
if err != nil {
return err
}
2022-11-23 17:46:21 +01:00
2018-08-19 18:53:59 +02:00
// Refresh metadata on object
2023-08-16 17:59:39 +02:00
if ! ui . isDirMarker {
2023-04-28 18:24:19 +02:00
o . clearMetaData ( )
err = o . readMetaData ( ctx )
if err != nil {
return err
}
2018-08-19 18:53:59 +02:00
}
// If tier is not changed or not specified, do not attempt to invoke `SetBlobTier` operation
if o . fs . opt . AccessTier == string ( defaultAccessTier ) || o . fs . opt . AccessTier == string ( o . AccessTier ( ) ) {
return nil
}
// Now, set blob tier based on configured access tier
2018-09-11 03:55:06 +02:00
return o . SetTier ( o . fs . opt . AccessTier )
2017-07-25 16:18:13 +02:00
}
// Remove an object
2019-06-17 10:34:30 +02:00
func ( o * Object ) Remove ( ctx context . Context ) error {
2022-11-23 17:46:21 +01:00
blb := o . getBlobSVC ( )
//only := blob.DeleteSnapshotsOptionTypeOnly
opt := blob . DeleteOptions {
//DeleteSnapshots: &only,
}
2017-07-25 16:18:13 +02:00
return o . fs . pacer . Call ( func ( ) ( bool , error ) {
2022-11-23 17:46:21 +01:00
_ , err := blb . Delete ( ctx , & opt )
2021-03-11 15:44:01 +01:00
return o . fs . shouldRetry ( ctx , err )
2017-07-25 16:18:13 +02:00
} )
}
// MimeType of an Object if known, "" otherwise
2019-06-17 10:34:30 +02:00
func ( o * Object ) MimeType ( ctx context . Context ) string {
2017-07-25 16:18:13 +02:00
return o . mimeType
}
2018-08-19 18:53:59 +02:00
// AccessTier of an object, default is of type none
2022-11-23 17:46:21 +01:00
func ( o * Object ) AccessTier ( ) blob . AccessTier {
2018-08-19 18:53:59 +02:00
return o . accessTier
}
2018-09-11 03:55:06 +02:00
// SetTier performs changing object tier
func ( o * Object ) SetTier ( tier string ) error {
if ! validateAccessTier ( tier ) {
2022-06-08 22:54:39 +02:00
return fmt . Errorf ( "tier %s not supported by Azure Blob Storage" , tier )
2018-09-11 03:55:06 +02:00
}
// Check if current tier already matches with desired tier
if o . GetTier ( ) == tier {
return nil
}
2022-11-23 17:46:21 +01:00
desiredAccessTier := blob . AccessTier ( tier )
blb := o . getBlobSVC ( )
2018-09-11 03:55:06 +02:00
ctx := context . Background ( )
2022-11-23 17:46:21 +01:00
priority := blob . RehydratePriorityStandard
opt := blob . SetTierOptions {
RehydratePriority : & priority ,
}
2018-09-11 03:55:06 +02:00
err := o . fs . pacer . Call ( func ( ) ( bool , error ) {
2022-11-23 17:46:21 +01:00
_ , err := blb . SetTier ( ctx , desiredAccessTier , & opt )
2021-03-11 15:44:01 +01:00
return o . fs . shouldRetry ( ctx , err )
2018-09-11 03:55:06 +02:00
} )
if err != nil {
2022-06-08 22:54:39 +02:00
return fmt . Errorf ( "failed to set Blob Tier: %w" , err )
2018-09-11 03:55:06 +02:00
}
// Set access tier on local object also, this typically
// gets updated on get blob properties
o . accessTier = desiredAccessTier
fs . Debugf ( o , "Successfully changed object tier to %s" , tier )
return nil
}
// GetTier returns object tier in azure as string
func ( o * Object ) GetTier ( ) string {
return string ( o . accessTier )
}
2023-04-04 17:22:02 +02:00
func parseTier ( tier string ) * blob . AccessTier {
if tier == "" {
return nil
}
msTier := blob . AccessTier ( tier )
return & msTier
}
2017-07-25 16:18:13 +02:00
// Check the interfaces are satisfied
var (
2023-08-16 17:59:39 +02:00
_ fs . Fs = & Fs { }
_ fs . Copier = & Fs { }
_ fs . PutStreamer = & Fs { }
_ fs . Purger = & Fs { }
_ fs . ListRer = & Fs { }
_ fs . OpenChunkWriter = & Fs { }
_ fs . Object = & Object { }
_ fs . MimeTyper = & Object { }
_ fs . GetTierer = & Object { }
_ fs . SetTierer = & Object { }
2017-07-25 16:18:13 +02:00
)