[bugfix] determine mime-type to use during ffprobe evaluation stage, don't bother checking against file extension (#3506)

* determine mime-type to use during ffprobe evaluation stage, don't bother rechecking by file extension

* set mjpeg content-type

* fix up tests expecting differing default values
This commit is contained in:
kim 2024-11-04 13:58:15 +00:00 committed by GitHub
parent d2820a1470
commit 8f288f1689
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 85 additions and 1378 deletions

1
go.mod
View File

@ -17,7 +17,6 @@ require (
codeberg.org/gruf/go-kv v1.6.5
codeberg.org/gruf/go-list v0.0.0-20240425093752-494db03d641f
codeberg.org/gruf/go-mempool v0.0.0-20240507125005-cef10d64a760
codeberg.org/gruf/go-mimetypes v1.2.0
codeberg.org/gruf/go-mutexes v1.5.1
codeberg.org/gruf/go-runners v1.6.3
codeberg.org/gruf/go-sched v1.2.4

2
go.sum generated
View File

@ -62,8 +62,6 @@ codeberg.org/gruf/go-maps v1.0.4 h1:K+Ww4vvR3TZqm5jqrKVirmguZwa3v1VUvmig2SE8uxY=
codeberg.org/gruf/go-maps v1.0.4/go.mod h1:ASX7osM7kFwt5O8GfGflcFjrwYGD8eIuRLl/oMjhEi8=
codeberg.org/gruf/go-mempool v0.0.0-20240507125005-cef10d64a760 h1:m2/UCRXhjDwAg4vyji6iKCpomKw6P4PmBOUi5DvAMH4=
codeberg.org/gruf/go-mempool v0.0.0-20240507125005-cef10d64a760/go.mod h1:E3RcaCFNq4zXpvaJb8lfpPqdUAmSkP5F1VmMiEUYTEk=
codeberg.org/gruf/go-mimetypes v1.2.0 h1:3rZGXY/SkNYbamiddWXs2gETXIBkGIeWYnbWpp2OEbc=
codeberg.org/gruf/go-mimetypes v1.2.0/go.mod h1:YiUWRj/nAdJQc+UFRvcsL6xXZsbc6b6Ic739ycEO8Yg=
codeberg.org/gruf/go-mutexes v1.5.1 h1:xICU0WXhWr6wf+Iror4eE3xT+xnXNPrO6o77D/G6QuY=
codeberg.org/gruf/go-mutexes v1.5.1/go.mod h1:rPEqQ/y6CmGITaZ3GPTMQVsoZAOzbsAHyIaLsJcOqVE=
codeberg.org/gruf/go-runners v1.6.3 h1:To/AX7eTrWuXrTkA3RA01YTP5zha1VZ68LQ+0D4RY7E=

View File

@ -120,7 +120,7 @@ func (suite *InstancePatchTestSuite) TestInstancePatch1() {
"image/apng",
"audio/ogg",
"video/ogg",
"audio/x-m4a",
"audio/mp4",
"video/mp4",
"video/quicktime",
"audio/x-ms-wma",
@ -261,7 +261,7 @@ func (suite *InstancePatchTestSuite) TestInstancePatch2() {
"image/apng",
"audio/ogg",
"video/ogg",
"audio/x-m4a",
"audio/mp4",
"video/mp4",
"video/quicktime",
"audio/x-ms-wma",
@ -402,7 +402,7 @@ func (suite *InstancePatchTestSuite) TestInstancePatch3() {
"image/apng",
"audio/ogg",
"video/ogg",
"audio/x-m4a",
"audio/mp4",
"video/mp4",
"video/quicktime",
"audio/x-ms-wma",
@ -594,7 +594,7 @@ func (suite *InstancePatchTestSuite) TestInstancePatch6() {
"image/apng",
"audio/ogg",
"video/ogg",
"audio/x-m4a",
"audio/mp4",
"video/mp4",
"video/quicktime",
"audio/x-ms-wma",
@ -757,7 +757,7 @@ func (suite *InstancePatchTestSuite) TestInstancePatch8() {
"image/apng",
"audio/ogg",
"video/ogg",
"audio/x-m4a",
"audio/mp4",
"video/mp4",
"video/quicktime",
"audio/x-ms-wma",
@ -939,7 +939,7 @@ func (suite *InstancePatchTestSuite) TestInstancePatch9() {
"image/apng",
"audio/ogg",
"video/ogg",
"audio/x-m4a",
"audio/mp4",
"video/mp4",
"video/quicktime",
"audio/x-ms-wma",

View File

@ -323,14 +323,14 @@ type videoStream struct {
//
// Note the checks for (len(res.video) > 0) may catch some audio files with embedded
// album art as video, but i blame that on the hellscape that is media filetypes.
//
// TODO: we can update this code to also return a mimetype and avoid later parsing!
func (res *result) GetFileType() (gtsmodel.FileType, string) {
func (res *result) GetFileType() (gtsmodel.FileType, string, string) {
switch res.format {
case "mpeg":
return gtsmodel.FileTypeVideo, "mpeg"
return gtsmodel.FileTypeVideo,
"video/mpeg", "mpeg"
case "mjpeg":
return gtsmodel.FileTypeVideo, "mjpeg"
return gtsmodel.FileTypeVideo,
"video/x-motion-jpeg", "mjpeg"
case "mov,mp4,m4a,3gp,3g2,mj2":
switch {
case len(res.video) > 0:
@ -338,55 +338,70 @@ func (res *result) GetFileType() (gtsmodel.FileType, string) {
res.duration <= 30 {
// Short, soundless
// video file aka gifv.
return gtsmodel.FileTypeGifv, "mp4"
return gtsmodel.FileTypeGifv,
"video/mp4", "mp4"
} else {
// Video file (with or without audio).
return gtsmodel.FileTypeVideo, "mp4"
return gtsmodel.FileTypeVideo,
"video/mp4", "mp4"
}
case len(res.audio) > 0 &&
res.audio[0].codec == "aac":
// m4a only supports [aac] audio.
return gtsmodel.FileTypeAudio, "m4a"
return gtsmodel.FileTypeAudio,
"audio/mp4", "m4a"
}
case "apng":
return gtsmodel.FileTypeImage, "apng"
return gtsmodel.FileTypeImage,
"image/apng", "apng"
case "png_pipe":
return gtsmodel.FileTypeImage, "png"
return gtsmodel.FileTypeImage,
"image/png", "png"
case "image2", "image2pipe", "jpeg_pipe":
return gtsmodel.FileTypeImage, "jpeg"
return gtsmodel.FileTypeImage,
"image/jpeg", "jpeg"
case "webp", "webp_pipe":
return gtsmodel.FileTypeImage, "webp"
return gtsmodel.FileTypeImage,
"image/webp", "webp"
case "gif":
return gtsmodel.FileTypeImage, "gif"
return gtsmodel.FileTypeImage,
"image/gif", "gif"
case "mp3":
if len(res.audio) > 0 {
switch res.audio[0].codec {
case "mp2":
return gtsmodel.FileTypeAudio, "mp2"
return gtsmodel.FileTypeAudio,
"audio/mp2", "mp2"
case "mp3":
return gtsmodel.FileTypeAudio, "mp3"
return gtsmodel.FileTypeAudio,
"audio/mp3", "mp3"
}
}
case "asf":
switch {
case len(res.video) > 0:
return gtsmodel.FileTypeVideo, "wmv"
return gtsmodel.FileTypeVideo,
"video/x-ms-wmv", "wmv"
case len(res.audio) > 0:
return gtsmodel.FileTypeAudio, "wma"
return gtsmodel.FileTypeAudio,
"audio/x-ms-wma", "wma"
}
case "ogg":
if len(res.video) > 0 {
switch res.video[0].codec {
case "theora", "dirac": // daala, tarkin
return gtsmodel.FileTypeVideo, "ogv"
return gtsmodel.FileTypeVideo,
"video/ogg", "ogv"
}
}
if len(res.audio) > 0 {
switch res.audio[0].codec {
case "opus", "libopus":
return gtsmodel.FileTypeAudio, "opus"
return gtsmodel.FileTypeAudio,
"audio/opus", "opus"
default:
return gtsmodel.FileTypeAudio, "ogg"
return gtsmodel.FileTypeAudio,
"audio/ogg", "ogg"
}
}
case "matroska,webm":
@ -411,21 +426,27 @@ func (res *result) GetFileType() (gtsmodel.FileType, string) {
}
if isWebm {
// Check for valid webm codec config.
return gtsmodel.FileTypeVideo, "webm"
// Check valid webm codec config.
return gtsmodel.FileTypeVideo,
"video/webm", "webm"
}
// All else falls under generic mkv.
return gtsmodel.FileTypeVideo, "mkv"
return gtsmodel.FileTypeVideo,
"video/x-matroska", "mkv"
case len(res.audio) > 0:
return gtsmodel.FileTypeAudio, "mka"
return gtsmodel.FileTypeAudio,
"audio/x-matroska", "mka"
}
case "avi":
return gtsmodel.FileTypeVideo, "avi"
return gtsmodel.FileTypeVideo,
"video/x-msvideo", "avi"
case "flac":
return gtsmodel.FileTypeAudio, "flac"
return gtsmodel.FileTypeAudio,
"audio/flac", "flac"
}
return gtsmodel.FileTypeUnknown, res.format
return gtsmodel.FileTypeUnknown,
"", res.format
}
// ImageMeta extracts image metadata contained within ffprobe'd media result streams.

View File

@ -56,7 +56,7 @@
"video/ogg", // .ogv
// mpeg4 types
"audio/x-m4a", // .m4a
"audio/mp4", // .m4a
"video/mp4", // .mp4
"video/quicktime", // .mov

View File

@ -664,7 +664,7 @@ func (suite *ManagerTestSuite) TestOpusProcess() {
Duration: util.Ptr(float32(122.10006)),
Bitrate: util.Ptr(uint64(116426)),
}, attachment.FileMeta.Original)
suite.Equal("audio/ogg", attachment.File.ContentType)
suite.Equal("audio/opus", attachment.File.ContentType)
suite.Equal(1776956, attachment.File.FileSize)
suite.Empty(attachment.Blurhash)

View File

@ -163,9 +163,10 @@ func (p *ProcessingEmoji) store(ctx context.Context) error {
}
var ext string
var fileType gtsmodel.FileType
// Get type from ffprobe format data.
fileType, ext := result.GetFileType()
// Get abstract file type, mimetype and ext from ffprobe data.
fileType, p.emoji.ImageContentType, ext = result.GetFileType()
if fileType != gtsmodel.FileTypeImage {
return gtserror.Newf("unsupported emoji filetype: %s (%s)", fileType, ext)
}
@ -216,10 +217,6 @@ func (p *ProcessingEmoji) store(ctx context.Context) error {
"png",
)
// Get mimetype for the file container
// type, falling back to generic data.
p.emoji.ImageContentType = getMimeType(ext)
// Set the known emoji static content type.
p.emoji.ImageStaticContentType = "image/png"

View File

@ -186,8 +186,8 @@ func (p *ProcessingMedia) store(ctx context.Context) error {
p.media.FileMeta.Original.Duration = util.PtrIf(float32(result.duration))
p.media.FileMeta.Original.Bitrate = util.PtrIf(result.bitrate)
// Set media type from ffprobe format data.
p.media.Type, ext = result.GetFileType()
// Set generic media type and mimetype from ffprobe format data.
p.media.Type, p.media.File.ContentType, ext = result.GetFileType()
// Add file extension to path.
newpath := temppath + "." + ext
@ -236,10 +236,10 @@ func (p *ProcessingMedia) store(ctx context.Context) error {
// Determine if blurhash needs generating.
needBlurhash := (p.media.Blurhash == "")
var newBlurhash string
var newBlurhash, mimeType string
// Generate thumbnail, and new blurhash if need from media.
thumbpath, newBlurhash, err = generateThumb(ctx, temppath,
// Generate thumbnail, and new blurhash if needed from temp media.
thumbpath, mimeType, newBlurhash, err = generateThumb(ctx, temppath,
thumbWidth,
thumbHeight,
result.orientation,
@ -250,6 +250,9 @@ func (p *ProcessingMedia) store(ctx context.Context) error {
return gtserror.Newf("error generating image thumb: %w", err)
}
// Set generated thumbnail's mimetype.
p.media.Thumbnail.ContentType = mimeType
if needBlurhash {
// Set newly determined blurhash.
p.media.Blurhash = newBlurhash
@ -265,10 +268,6 @@ func (p *ProcessingMedia) store(ctx context.Context) error {
ext,
)
// Get mimetype for the file container
// type, falling back to generic data.
p.media.File.ContentType = getMimeType(ext)
// Copy temporary file into storage at path.
filesz, err := p.mgr.state.Storage.PutFile(ctx,
p.media.File.Path,
@ -295,9 +294,6 @@ func (p *ProcessingMedia) store(ctx context.Context) error {
thumbExt,
)
// Determine thumbnail content-type from thumb ext.
p.media.Thumbnail.ContentType = getMimeType(thumbExt)
// Copy thumbnail file into storage at path.
thumbsz, err := p.mgr.state.Storage.PutFile(ctx,
p.media.Thumbnail.Path,

View File

@ -84,17 +84,21 @@ func generateThumb(
needBlurhash bool,
) (
outpath string,
mimeType string,
blurhash string,
err error,
) {
var ext string
// Default type is webp.
mimeType = "image/webp"
// Generate thumb output path REPLACING extension.
if i := strings.IndexByte(filepath, '.'); i != -1 {
outpath = filepath[:i] + "_thumb.webp"
ext = filepath[i+1:] // old extension
} else {
return "", "", gtserror.New("input file missing extension")
return "", "", "", gtserror.New("input file missing extension")
}
// Check for the few media types we
@ -106,6 +110,7 @@ func generateThumb(
// Replace the "webp" with "jpeg", as we'll
// use our native Go thumbnailing generation.
outpath = outpath[:len(outpath)-4] + "jpeg"
mimeType = "image/jpeg"
log.Debug(ctx, "generating thumb from jpeg")
blurhash, err := generateNativeThumb(
@ -117,7 +122,7 @@ func generateThumb(
jpeg.Decode,
needBlurhash,
)
return outpath, blurhash, err
return outpath, mimeType, blurhash, err
// We specifically only allow generating native
// thumbnails from gif IF it doesn't contain an
@ -128,6 +133,7 @@ func generateThumb(
// Replace the "webp" with "jpeg", as we'll
// use our native Go thumbnailing generation.
outpath = outpath[:len(outpath)-4] + "jpeg"
mimeType = "image/jpeg"
log.Debug(ctx, "generating thumb from gif")
blurhash, err := generateNativeThumb(
@ -139,7 +145,7 @@ func generateThumb(
gif.Decode,
needBlurhash,
)
return outpath, blurhash, err
return outpath, mimeType, blurhash, err
// We specifically only allow generating native
// thumbnails from png IF it doesn't contain an
@ -150,6 +156,7 @@ func generateThumb(
// Replace the "webp" with "jpeg", as we'll
// use our native Go thumbnailing generation.
outpath = outpath[:len(outpath)-4] + "jpeg"
mimeType = "image/jpeg"
log.Debug(ctx, "generating thumb from png")
blurhash, err := generateNativeThumb(
@ -161,7 +168,7 @@ func generateThumb(
png.Decode,
needBlurhash,
)
return outpath, blurhash, err
return outpath, mimeType, blurhash, err
// We specifically only allow generating native
// thumbnails from webp IF it doesn't contain an
@ -172,6 +179,7 @@ func generateThumb(
// Replace the "webp" with "jpeg", as we'll
// use our native Go thumbnailing generation.
outpath = outpath[:len(outpath)-4] + "jpeg"
mimeType = "image/jpeg"
log.Debug(ctx, "generating thumb from webp")
blurhash, err := generateNativeThumb(
@ -183,7 +191,7 @@ func generateThumb(
webp.Decode,
needBlurhash,
)
return outpath, blurhash, err
return outpath, mimeType, blurhash, err
}
// The fallback for thumbnail generation, which
@ -196,18 +204,18 @@ func generateThumb(
height,
pixfmt,
); err != nil {
return outpath, "", err
return outpath, "", "", err
}
if needBlurhash {
// Generate new blurhash from webp output thumb.
blurhash, err = generateWebpBlurhash(outpath)
if err != nil {
return outpath, "", gtserror.Newf("error generating blurhash: %w", err)
return outpath, "", "", gtserror.Newf("error generating blurhash: %w", err)
}
}
return outpath, blurhash, err
return outpath, mimeType, blurhash, nil
}
// generateNativeThumb generates a thumbnail

View File

@ -18,7 +18,6 @@
package media
import (
"cmp"
"errors"
"fmt"
"io"
@ -28,7 +27,6 @@
"codeberg.org/gruf/go-bytesize"
"codeberg.org/gruf/go-iotools"
"codeberg.org/gruf/go-mimetypes"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
)
@ -87,12 +85,6 @@ func getExtension(path string) string {
return ""
}
// getMimeType returns a suitable mimetype for file extension.
func getMimeType(ext string) string {
const defaultType = "application/octet-stream"
return cmp.Or(mimetypes.MimeTypes[ext], defaultType)
}
// drainToTmp drains data from given reader into a new temp file
// and closes it, returning the path of the resulting temp file.
//

View File

@ -1958,7 +1958,7 @@ func (suite *InternalToFrontendTestSuite) TestInstanceV1ToFrontend() {
"image/apng",
"audio/ogg",
"video/ogg",
"audio/x-m4a",
"audio/mp4",
"video/mp4",
"video/quicktime",
"audio/x-ms-wma",
@ -2103,7 +2103,7 @@ func (suite *InternalToFrontendTestSuite) TestInstanceV2ToFrontend() {
"image/apng",
"audio/ogg",
"video/ogg",
"audio/x-m4a",
"audio/mp4",
"video/mp4",
"video/quicktime",
"audio/x-ms-wma",

View File

@ -1,5 +0,0 @@
# go-mimetypes
A generated lookup map of file extensions to mimetypes, from data provided at: https://raw.githubusercontent.com/micnic/mime.json/master/index.json
This allows determining mimetype without relying on OS mimetype lookups.

View File

@ -1,42 +0,0 @@
#!/bin/sh
# Mime types JSON source
URL='https://raw.githubusercontent.com/micnic/mime.json/master/index.json'
# Define intro to file
FILE='
// This is an automatically generated file, do not edit
package mimetypes
// MimeTypes is a map of file extensions to mime types.
var MimeTypes = map[string]string{
'
# Set break on new-line
IFS='
'
for line in $(curl -fL "$URL" | grep -E '".+"\s*:\s*".+"'); do
# Trim final whitespace
line=$(echo "$line" | sed -e 's|\s*$||')
# Ensure it ends in a comma
[ "${line%,}" = "$line" ] && line="${line},"
# Add to file
FILE="${FILE}${line}
"
done
# Add final statement to file
FILE="${FILE}
}
"
# Write to file
echo "$FILE" > 'mime.gen.go'
# Check for valid go
gofumpt -w 'mime.gen.go'

File diff suppressed because it is too large Load Diff

View File

@ -1,47 +0,0 @@
package mimetypes
import "path"
// PreferredExts defines preferred file
// extensions for input mime types (as there
// can be multiple extensions per mime type).
var PreferredExts = map[string]string{
MimeTypes["mp3"]: "mp3", // audio/mpeg
MimeTypes["mpeg"]: "mpeg", // video/mpeg
}
// GetForFilename returns mimetype for given filename.
func GetForFilename(filename string) (string, bool) {
ext := path.Ext(filename)
if len(ext) < 1 {
return "", false
}
mime, ok := MimeTypes[ext[1:]]
return mime, ok
}
// GetFileExt returns the file extension to use for mimetype. Relying first upon
// the 'PreferredExts' map. It simply returns the first match there may multiple.
func GetFileExt(mimeType string) (string, bool) {
ext, ok := PreferredExts[mimeType]
if ok {
return ext, true
}
for ext, mime := range MimeTypes {
if mime == mimeType {
return ext, true
}
}
return "", false
}
// GetFileExts returns known file extensions used for mimetype.
func GetFileExts(mimeType string) []string {
var exts []string
for ext, mime := range MimeTypes {
if mime == mimeType {
exts = append(exts, ext)
}
}
return exts
}

3
vendor/modules.txt vendored
View File

@ -48,9 +48,6 @@ codeberg.org/gruf/go-maps
# codeberg.org/gruf/go-mempool v0.0.0-20240507125005-cef10d64a760
## explicit; go 1.22.2
codeberg.org/gruf/go-mempool
# codeberg.org/gruf/go-mimetypes v1.2.0
## explicit; go 1.17
codeberg.org/gruf/go-mimetypes
# codeberg.org/gruf/go-mutexes v1.5.1
## explicit; go 1.22.2
codeberg.org/gruf/go-mutexes