dendrite/mediaapi/routing/download.go

968 lines
34 KiB
Go

// Copyright 2024 New Vector Ltd.
// Copyright 2017 Vector Creations Ltd
//
// SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
// Please see LICENSE files in the repository root for full details.
package routing
import (
"context"
"encoding/json"
"fmt"
"io"
"io/fs"
"mime"
"mime/multipart"
"net/http"
"net/textproto"
"net/url"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"unicode"
"github.com/element-hq/dendrite/mediaapi/fileutils"
"github.com/element-hq/dendrite/mediaapi/storage"
"github.com/element-hq/dendrite/mediaapi/thumbnailer"
"github.com/element-hq/dendrite/mediaapi/types"
"github.com/element-hq/dendrite/setup/config"
"github.com/matrix-org/gomatrixserverlib/fclient"
"github.com/matrix-org/gomatrixserverlib/spec"
"github.com/matrix-org/util"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
)
const mediaIDCharacters = "A-Za-z0-9_=-"
// Note: unfortunately regex.MustCompile() cannot be assigned to a const
var mediaIDRegex = regexp.MustCompile("^[" + mediaIDCharacters + "]+$")
// Regular expressions to help us cope with Content-Disposition parsing
var rfc2183 = regexp.MustCompile(`filename\=utf-8\"(.*)\"`)
var rfc6266 = regexp.MustCompile(`filename\*\=utf-8\'\'(.*)`)
// downloadRequest metadata included in or derivable from a download or thumbnail request
// https://matrix.org/docs/spec/client_server/r0.2.0.html#get-matrix-media-r0-download-servername-mediaid
// http://matrix.org/docs/spec/client_server/r0.2.0.html#get-matrix-media-r0-thumbnail-servername-mediaid
type downloadRequest struct {
MediaMetadata *types.MediaMetadata
IsThumbnailRequest bool
ThumbnailSize types.ThumbnailSize
Logger *log.Entry
DownloadFilename string
multipartResponse bool // whether we need to return a multipart/mixed response (for requests coming in over federation)
fedClient fclient.FederationClient
origin spec.ServerName
}
// Taken from: https://github.com/matrix-org/synapse/blob/c3627d0f99ed5a23479305dc2bd0e71ca25ce2b1/synapse/media/_base.py#L53C1-L84
// A list of all content types that are "safe" to be rendered inline in a browser.
var allowInlineTypes = map[types.ContentType]struct{}{
"text/css": {},
"text/plain": {},
"text/csv": {},
"application/json": {},
"application/ld+json": {},
// We allow some media files deemed as safe, which comes from the matrix-react-sdk.
// https://github.com/matrix-org/matrix-react-sdk/blob/a70fcfd0bcf7f8c85986da18001ea11597989a7c/src/utils/blobs.ts#L51
// SVGs are *intentionally* omitted.
"image/jpeg": {},
"image/gif": {},
"image/png": {},
"image/apng": {},
"image/webp": {},
"image/avif": {},
"video/mp4": {},
"video/webm": {},
"video/ogg": {},
"video/quicktime": {},
"audio/mp4": {},
"audio/webm": {},
"audio/aac": {},
"audio/mpeg": {},
"audio/ogg": {},
"audio/wave": {},
"audio/wav": {},
"audio/x-wav": {},
"audio/x-pn-wav": {},
"audio/flac": {},
"audio/x-flac": {},
}
// Download implements GET /download and GET /thumbnail
// Files from this server (i.e. origin == cfg.ServerName) are served directly
// Files from remote servers (i.e. origin != cfg.ServerName) are cached locally.
// If they are present in the cache, they are served directly.
// If they are not present in the cache, they are obtained from the remote server and
// simultaneously served back to the client and written into the cache.
func Download(
w http.ResponseWriter,
req *http.Request,
origin spec.ServerName,
mediaID types.MediaID,
cfg *config.MediaAPI,
db storage.Database,
client *fclient.Client,
fedClient fclient.FederationClient,
activeRemoteRequests *types.ActiveRemoteRequests,
activeThumbnailGeneration *types.ActiveThumbnailGeneration,
isThumbnailRequest bool,
customFilename string,
federationRequest bool,
) {
// This happens if we call Download for a federation request
if federationRequest && origin == "" {
origin = cfg.Matrix.ServerName
}
dReq := &downloadRequest{
MediaMetadata: &types.MediaMetadata{
MediaID: mediaID,
Origin: origin,
},
IsThumbnailRequest: isThumbnailRequest,
Logger: util.GetLogger(req.Context()).WithFields(log.Fields{
"Origin": origin,
"MediaID": mediaID,
}),
DownloadFilename: customFilename,
multipartResponse: federationRequest,
origin: cfg.Matrix.ServerName,
fedClient: fedClient,
}
if dReq.IsThumbnailRequest {
width, err := strconv.Atoi(req.FormValue("width"))
if err != nil {
width = -1
}
height, err := strconv.Atoi(req.FormValue("height"))
if err != nil {
height = -1
}
dReq.ThumbnailSize = types.ThumbnailSize{
Width: width,
Height: height,
ResizeMethod: strings.ToLower(req.FormValue("method")),
}
dReq.Logger.WithFields(log.Fields{
"RequestedWidth": dReq.ThumbnailSize.Width,
"RequestedHeight": dReq.ThumbnailSize.Height,
"RequestedResizeMethod": dReq.ThumbnailSize.ResizeMethod,
})
}
// request validation
if resErr := dReq.Validate(); resErr != nil {
dReq.jsonErrorResponse(w, *resErr)
return
}
metadata, err := dReq.doDownload(
req.Context(), w, cfg, db, client,
activeRemoteRequests, activeThumbnailGeneration,
)
if err != nil {
// If we bubbled up a os.PathError, e.g. no such file or directory, don't send
// it to the client, be more generic.
var perr *fs.PathError
if errors.As(err, &perr) {
dReq.Logger.WithError(err).Error("failed to open file")
dReq.jsonErrorResponse(w, util.JSONResponse{
Code: http.StatusNotFound,
JSON: spec.NotFound("File not found"),
})
return
}
// TODO: Handle the fact we might have started writing the response
dReq.jsonErrorResponse(w, util.JSONResponse{
Code: http.StatusNotFound,
JSON: spec.NotFound("Failed to download: " + err.Error()),
})
return
}
if metadata == nil {
dReq.jsonErrorResponse(w, util.JSONResponse{
Code: http.StatusNotFound,
JSON: spec.NotFound("File not found"),
})
return
}
}
func (r *downloadRequest) jsonErrorResponse(w http.ResponseWriter, res util.JSONResponse) {
// Marshal JSON response into raw bytes to send as the HTTP body
resBytes, err := json.Marshal(res.JSON)
if err != nil {
r.Logger.WithError(err).Error("Failed to marshal JSONResponse")
// this should never fail to be marshalled so drop err to the floor
res = util.MessageResponse(http.StatusNotFound, "Download request failed: "+err.Error())
resBytes, _ = json.Marshal(res.JSON)
}
// Set status code and write the body
w.WriteHeader(res.Code)
r.Logger.WithField("code", res.Code).Tracef("Responding (%d bytes)", len(resBytes))
// we don't really care that much if we fail to write the error response
w.Write(resBytes) // nolint: errcheck
}
// Validate validates the downloadRequest fields
func (r *downloadRequest) Validate() *util.JSONResponse {
if !mediaIDRegex.MatchString(string(r.MediaMetadata.MediaID)) {
return &util.JSONResponse{
Code: http.StatusNotFound,
JSON: spec.NotFound(fmt.Sprintf("mediaId must be a non-empty string using only characters in %v", mediaIDCharacters)),
}
}
// Note: the origin will be validated either by comparison to the configured server name of this homeserver
// or by a DNS SRV record lookup when creating a request for remote files
if r.MediaMetadata.Origin == "" {
return &util.JSONResponse{
Code: http.StatusNotFound,
JSON: spec.NotFound("serverName must be a non-empty string"),
}
}
if r.IsThumbnailRequest {
if r.ThumbnailSize.Width <= 0 || r.ThumbnailSize.Height <= 0 {
return &util.JSONResponse{
Code: http.StatusBadRequest,
JSON: spec.Unknown("width and height must be greater than 0"),
}
}
// Default method to scale if not set
if r.ThumbnailSize.ResizeMethod == "" {
r.ThumbnailSize.ResizeMethod = types.Scale
}
if r.ThumbnailSize.ResizeMethod != types.Crop && r.ThumbnailSize.ResizeMethod != types.Scale {
return &util.JSONResponse{
Code: http.StatusBadRequest,
JSON: spec.Unknown("method must be one of crop or scale"),
}
}
}
return nil
}
func (r *downloadRequest) doDownload(
ctx context.Context,
w http.ResponseWriter,
cfg *config.MediaAPI,
db storage.Database,
client *fclient.Client,
activeRemoteRequests *types.ActiveRemoteRequests,
activeThumbnailGeneration *types.ActiveThumbnailGeneration,
) (*types.MediaMetadata, error) {
// check if we have a record of the media in our database
mediaMetadata, err := db.GetMediaMetadata(
ctx, r.MediaMetadata.MediaID, r.MediaMetadata.Origin,
)
if err != nil {
return nil, fmt.Errorf("db.GetMediaMetadata: %w", err)
}
if mediaMetadata == nil {
if r.MediaMetadata.Origin == cfg.Matrix.ServerName {
// If we do not have a record and the origin is local, the file is not found
return nil, nil
}
// If we do not have a record and the origin is remote, we need to fetch it and respond with that file
resErr := r.getRemoteFile(
ctx, client, cfg, db, activeRemoteRequests, activeThumbnailGeneration,
)
if resErr != nil {
return nil, resErr
}
} else {
// If we have a record, we can respond from the local file
r.MediaMetadata = mediaMetadata
}
return r.respondFromLocalFile(
ctx, w, cfg.AbsBasePath, activeThumbnailGeneration,
cfg.MaxThumbnailGenerators, db,
cfg.DynamicThumbnails, cfg.ThumbnailSizes,
)
}
// respondFromLocalFile reads a file from local storage and writes it to the http.ResponseWriter
// If no file was found then returns nil, nil
func (r *downloadRequest) respondFromLocalFile(
ctx context.Context,
w http.ResponseWriter,
absBasePath config.Path,
activeThumbnailGeneration *types.ActiveThumbnailGeneration,
maxThumbnailGenerators int,
db storage.Database,
dynamicThumbnails bool,
thumbnailSizes []config.ThumbnailSize,
) (*types.MediaMetadata, error) {
filePath, err := fileutils.GetPathFromBase64Hash(r.MediaMetadata.Base64Hash, absBasePath)
if err != nil {
return nil, fmt.Errorf("fileutils.GetPathFromBase64Hash: %w", err)
}
file, err := os.Open(filePath)
defer file.Close() // nolint: errcheck, staticcheck, megacheck
if err != nil {
return nil, fmt.Errorf("os.Open: %w", err)
}
stat, err := file.Stat()
if err != nil {
return nil, fmt.Errorf("file.Stat: %w", err)
}
if r.MediaMetadata.FileSizeBytes > 0 && int64(r.MediaMetadata.FileSizeBytes) != stat.Size() {
r.Logger.WithFields(log.Fields{
"fileSizeDatabase": r.MediaMetadata.FileSizeBytes,
"fileSizeDisk": stat.Size(),
}).Warn("File size in database and on-disk differ.")
return nil, errors.New("file size in database and on-disk differ")
}
var responseFile *os.File
var responseMetadata *types.MediaMetadata
if r.IsThumbnailRequest {
thumbFile, thumbMetadata, resErr := r.getThumbnailFile(
ctx, types.Path(filePath), activeThumbnailGeneration, maxThumbnailGenerators,
db, dynamicThumbnails, thumbnailSizes,
)
if thumbFile != nil {
defer thumbFile.Close() // nolint: errcheck
}
if resErr != nil {
return nil, resErr
}
if thumbFile == nil {
r.Logger.WithFields(log.Fields{
"UploadName": r.MediaMetadata.UploadName,
"Base64Hash": r.MediaMetadata.Base64Hash,
"FileSizeBytes": r.MediaMetadata.FileSizeBytes,
"ContentType": r.MediaMetadata.ContentType,
}).Trace("No good thumbnail found. Responding with original file.")
responseFile = file
responseMetadata = r.MediaMetadata
} else {
r.Logger.Trace("Responding with thumbnail")
responseFile = thumbFile
responseMetadata = thumbMetadata.MediaMetadata
}
} else {
r.Logger.WithFields(log.Fields{
"UploadName": r.MediaMetadata.UploadName,
"Base64Hash": r.MediaMetadata.Base64Hash,
"FileSizeBytes": r.MediaMetadata.FileSizeBytes,
"ContentType": r.MediaMetadata.ContentType,
}).Trace("Responding with file")
responseFile = file
responseMetadata = r.MediaMetadata
if err = r.addDownloadFilenameToHeaders(w, responseMetadata); err != nil {
return nil, err
}
}
w.Header().Set("Content-Type", string(responseMetadata.ContentType))
w.Header().Set("Content-Length", strconv.FormatInt(int64(responseMetadata.FileSizeBytes), 10))
contentSecurityPolicy := "default-src 'none';" +
" script-src 'none';" +
" plugin-types application/pdf;" +
" style-src 'unsafe-inline';" +
" object-src 'self';"
if !r.multipartResponse {
w.Header().Set("Content-Security-Policy", contentSecurityPolicy)
if _, err = io.Copy(w, responseFile); err != nil {
return nil, fmt.Errorf("io.Copy: %w", err)
}
} else {
var written int64
written, err = multipartResponse(w, r, string(responseMetadata.ContentType), responseFile)
if err != nil {
return nil, err
}
responseMetadata.FileSizeBytes = types.FileSizeBytes(written)
}
return responseMetadata, nil
}
func multipartResponse(w http.ResponseWriter, r *downloadRequest, contentType string, responseFile io.Reader) (int64, error) {
mw := multipart.NewWriter(w)
// Update the header to be multipart/mixed; boundary=$randomBoundary
w.Header().Set("Content-Type", "multipart/mixed; boundary="+mw.Boundary())
w.Header().Del("Content-Length") // let Go handle the content length
defer func() {
if err := mw.Close(); err != nil {
r.Logger.WithError(err).Error("Failed to close multipart writer")
}
}()
// JSON object part
jsonWriter, err := mw.CreatePart(textproto.MIMEHeader{
"Content-Type": {"application/json"},
})
if err != nil {
return 0, fmt.Errorf("failed to create json writer: %w", err)
}
if _, err = jsonWriter.Write([]byte("{}")); err != nil {
return 0, fmt.Errorf("failed to write to json writer: %w", err)
}
// media part
mediaWriter, err := mw.CreatePart(textproto.MIMEHeader{
"Content-Type": {contentType},
})
if err != nil {
return 0, fmt.Errorf("failed to create media writer: %w", err)
}
return io.Copy(mediaWriter, responseFile)
}
func (r *downloadRequest) addDownloadFilenameToHeaders(
w http.ResponseWriter,
responseMetadata *types.MediaMetadata,
) error {
// If the requestor supplied a filename to name the download then
// use that, otherwise use the filename from the response metadata.
filename := string(responseMetadata.UploadName)
if r.DownloadFilename != "" {
filename = r.DownloadFilename
}
if len(filename) == 0 {
w.Header().Set("Content-Disposition", contentDispositionFor(""))
return nil
}
unescaped, err := url.PathUnescape(filename)
if err != nil {
return fmt.Errorf("url.PathUnescape: %w", err)
}
isASCII := true // Is the string ASCII or UTF-8?
quote := `` // Encloses the string (ASCII only)
for i := 0; i < len(unescaped); i++ {
if unescaped[i] > unicode.MaxASCII {
isASCII = false
}
if unescaped[i] == 0x20 || unescaped[i] == 0x3B {
// If the filename contains a space or a semicolon, which
// are special characters in Content-Disposition
quote = `"`
}
}
// We don't necessarily want a full escape as the Content-Disposition
// can take many of the characters that PathEscape would otherwise and
// browser support for encoding is a bit wild, so we'll escape only
// the characters that we know will mess up the parsing of the
// Content-Disposition header elements themselves
unescaped = strings.ReplaceAll(unescaped, `\`, `\\"`)
unescaped = strings.ReplaceAll(unescaped, `"`, `\"`)
disposition := contentDispositionFor(responseMetadata.ContentType)
if isASCII {
// For ASCII filenames, we should only quote the filename if
// it needs to be done, e.g. it contains a space or a character
// that would otherwise be parsed as a control character in the
// Content-Disposition header
w.Header().Set("Content-Disposition", fmt.Sprintf(
`%s; filename=%s%s%s`,
disposition, quote, unescaped, quote,
))
} else {
// For UTF-8 filenames, we quote always, as that's the standard
w.Header().Set("Content-Disposition", fmt.Sprintf(
`%s; filename*=utf-8''%s`,
disposition, url.QueryEscape(unescaped),
))
}
return nil
}
// Note: Thumbnail generation may be ongoing asynchronously.
// If no thumbnail was found then returns nil, nil, nil
func (r *downloadRequest) getThumbnailFile(
ctx context.Context,
filePath types.Path,
activeThumbnailGeneration *types.ActiveThumbnailGeneration,
maxThumbnailGenerators int,
db storage.Database,
dynamicThumbnails bool,
thumbnailSizes []config.ThumbnailSize,
) (*os.File, *types.ThumbnailMetadata, error) {
var thumbnail *types.ThumbnailMetadata
var err error
if dynamicThumbnails {
thumbnail, err = r.generateThumbnail(
ctx, filePath, r.ThumbnailSize, activeThumbnailGeneration,
maxThumbnailGenerators, db,
)
if err != nil {
return nil, nil, err
}
}
// If dynamicThumbnails is true but there are too many thumbnails being actively generated, we can fall back
// to trying to use a pre-generated thumbnail
if thumbnail == nil {
var thumbnails []*types.ThumbnailMetadata
thumbnails, err = db.GetThumbnails(
ctx, r.MediaMetadata.MediaID, r.MediaMetadata.Origin,
)
if err != nil {
return nil, nil, fmt.Errorf("db.GetThumbnails: %w", err)
}
// If we get a thumbnailSize, a pre-generated thumbnail would be best but it is not yet generated.
// If we get a thumbnail, we're done.
var thumbnailSize *types.ThumbnailSize
thumbnail, thumbnailSize = thumbnailer.SelectThumbnail(r.ThumbnailSize, thumbnails, thumbnailSizes)
// If dynamicThumbnails is true and we are not over-loaded then we would have generated what was requested above.
// So we don't try to generate a pre-generated thumbnail here.
if thumbnailSize != nil && !dynamicThumbnails {
r.Logger.WithFields(log.Fields{
"Width": thumbnailSize.Width,
"Height": thumbnailSize.Height,
"ResizeMethod": thumbnailSize.ResizeMethod,
}).Debug("Pre-generating thumbnail for immediate response.")
thumbnail, err = r.generateThumbnail(
ctx, filePath, *thumbnailSize, activeThumbnailGeneration,
maxThumbnailGenerators, db,
)
if err != nil {
return nil, nil, err
}
}
}
if thumbnail == nil {
return nil, nil, nil
}
r.Logger = r.Logger.WithFields(log.Fields{
"Width": thumbnail.ThumbnailSize.Width,
"Height": thumbnail.ThumbnailSize.Height,
"ResizeMethod": thumbnail.ThumbnailSize.ResizeMethod,
"FileSizeBytes": thumbnail.MediaMetadata.FileSizeBytes,
"ContentType": thumbnail.MediaMetadata.ContentType,
})
thumbPath := string(thumbnailer.GetThumbnailPath(types.Path(filePath), thumbnail.ThumbnailSize))
thumbFile, err := os.Open(string(thumbPath))
if err != nil {
thumbFile.Close() // nolint: errcheck
return nil, nil, fmt.Errorf("os.Open: %w", err)
}
thumbStat, err := thumbFile.Stat()
if err != nil {
thumbFile.Close() // nolint: errcheck
return nil, nil, fmt.Errorf("thumbFile.Stat: %w", err)
}
if types.FileSizeBytes(thumbStat.Size()) != thumbnail.MediaMetadata.FileSizeBytes {
thumbFile.Close() // nolint: errcheck
return nil, nil, errors.New("thumbnail file sizes on disk and in database differ")
}
return thumbFile, thumbnail, nil
}
func (r *downloadRequest) generateThumbnail(
ctx context.Context,
filePath types.Path,
thumbnailSize types.ThumbnailSize,
activeThumbnailGeneration *types.ActiveThumbnailGeneration,
maxThumbnailGenerators int,
db storage.Database,
) (*types.ThumbnailMetadata, error) {
r.Logger.WithFields(log.Fields{
"Width": thumbnailSize.Width,
"Height": thumbnailSize.Height,
"ResizeMethod": thumbnailSize.ResizeMethod,
})
busy, err := thumbnailer.GenerateThumbnail(
ctx, filePath, thumbnailSize, r.MediaMetadata,
activeThumbnailGeneration, maxThumbnailGenerators, db, r.Logger,
)
if err != nil {
return nil, fmt.Errorf("thumbnailer.GenerateThumbnail: %w", err)
}
if busy {
return nil, nil
}
var thumbnail *types.ThumbnailMetadata
thumbnail, err = db.GetThumbnail(
ctx, r.MediaMetadata.MediaID, r.MediaMetadata.Origin,
thumbnailSize.Width, thumbnailSize.Height, thumbnailSize.ResizeMethod,
)
if err != nil {
return nil, fmt.Errorf("db.GetThumbnail: %w", err)
}
return thumbnail, nil
}
// getRemoteFile fetches the remote file and caches it locally
// A hash map of active remote requests to a struct containing a sync.Cond is used to only download remote files once,
// regardless of how many download requests are received.
// Note: The named errorResponse return variable is used in a deferred broadcast of the metadata and error response to waiting goroutines.
func (r *downloadRequest) getRemoteFile(
ctx context.Context,
client *fclient.Client,
cfg *config.MediaAPI,
db storage.Database,
activeRemoteRequests *types.ActiveRemoteRequests,
activeThumbnailGeneration *types.ActiveThumbnailGeneration,
) (errorResponse error) {
// Note: getMediaMetadataFromActiveRequest uses mutexes and conditions from activeRemoteRequests
mediaMetadata, resErr := r.getMediaMetadataFromActiveRequest(activeRemoteRequests)
if resErr != nil {
return resErr
} else if mediaMetadata != nil {
// If we got metadata from an active request, we can respond from the local file
r.MediaMetadata = mediaMetadata
} else {
// Note: This is an active request that MUST broadcastMediaMetadata to wake up waiting goroutines!
// Note: broadcastMediaMetadata uses mutexes and conditions from activeRemoteRequests
defer func() {
// Note: errorResponse is the named return variable so we wrap this in a closure to re-evaluate the arguments at defer-time
if err := recover(); err != nil {
r.broadcastMediaMetadata(activeRemoteRequests, errors.New("paniced"))
panic(err)
}
r.broadcastMediaMetadata(activeRemoteRequests, errorResponse)
}()
// check if we have a record of the media in our database
mediaMetadata, err := db.GetMediaMetadata(
ctx, r.MediaMetadata.MediaID, r.MediaMetadata.Origin,
)
if err != nil {
return fmt.Errorf("db.GetMediaMetadata: %w", err)
}
if mediaMetadata == nil {
// If we do not have a record, we need to fetch the remote file first and then respond from the local file
err := r.fetchRemoteFileAndStoreMetadata(
ctx, client,
cfg.AbsBasePath, cfg.MaxFileSizeBytes, db,
cfg.ThumbnailSizes, activeThumbnailGeneration,
cfg.MaxThumbnailGenerators,
)
if err != nil {
r.Logger.WithError(err).Errorf("r.fetchRemoteFileAndStoreMetadata: failed to fetch remote file")
return err
}
} else {
// If we have a record, we can respond from the local file
r.MediaMetadata = mediaMetadata
}
}
return nil
}
func (r *downloadRequest) getMediaMetadataFromActiveRequest(activeRemoteRequests *types.ActiveRemoteRequests) (*types.MediaMetadata, error) {
// Check if there is an active remote request for the file
mxcURL := "mxc://" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID)
activeRemoteRequests.Lock()
defer activeRemoteRequests.Unlock()
if activeRemoteRequestResult, ok := activeRemoteRequests.MXCToResult[mxcURL]; ok {
r.Logger.Trace("Waiting for another goroutine to fetch the remote file.")
// NOTE: Wait unlocks and locks again internally. There is still a deferred Unlock() that will unlock this.
activeRemoteRequestResult.Cond.Wait()
if activeRemoteRequestResult.Error != nil {
return nil, activeRemoteRequestResult.Error
}
if activeRemoteRequestResult.MediaMetadata == nil {
return nil, nil
}
return activeRemoteRequestResult.MediaMetadata, nil
}
// No active remote request so create one
activeRemoteRequests.MXCToResult[mxcURL] = &types.RemoteRequestResult{
Cond: &sync.Cond{L: activeRemoteRequests},
}
return nil, nil
}
// broadcastMediaMetadata broadcasts the media metadata and error response to waiting goroutines
// Only the owner of the activeRemoteRequestResult for this origin and media ID should call this function.
func (r *downloadRequest) broadcastMediaMetadata(activeRemoteRequests *types.ActiveRemoteRequests, err error) {
activeRemoteRequests.Lock()
defer activeRemoteRequests.Unlock()
mxcURL := "mxc://" + string(r.MediaMetadata.Origin) + "/" + string(r.MediaMetadata.MediaID)
if activeRemoteRequestResult, ok := activeRemoteRequests.MXCToResult[mxcURL]; ok {
r.Logger.Trace("Signalling other goroutines waiting for this goroutine to fetch the file.")
activeRemoteRequestResult.MediaMetadata = r.MediaMetadata
activeRemoteRequestResult.Error = err
activeRemoteRequestResult.Cond.Broadcast()
}
delete(activeRemoteRequests.MXCToResult, mxcURL)
}
// fetchRemoteFileAndStoreMetadata fetches the file from the remote server and stores its metadata in the database
func (r *downloadRequest) fetchRemoteFileAndStoreMetadata(
ctx context.Context,
client *fclient.Client,
absBasePath config.Path,
maxFileSizeBytes config.FileSizeBytes,
db storage.Database,
thumbnailSizes []config.ThumbnailSize,
activeThumbnailGeneration *types.ActiveThumbnailGeneration,
maxThumbnailGenerators int,
) error {
finalPath, duplicate, err := r.fetchRemoteFile(
ctx, client, absBasePath, maxFileSizeBytes,
)
if err != nil {
return err
}
r.Logger.WithFields(log.Fields{
"Base64Hash": r.MediaMetadata.Base64Hash,
"UploadName": r.MediaMetadata.UploadName,
"FileSizeBytes": r.MediaMetadata.FileSizeBytes,
"ContentType": r.MediaMetadata.ContentType,
}).Debug("Storing file metadata to media repository database")
// FIXME: timeout db request
if err := db.StoreMediaMetadata(ctx, r.MediaMetadata); err != nil {
// If the file is a duplicate (has the same hash as an existing file) then
// there is valid metadata in the database for that file. As such we only
// remove the file if it is not a duplicate.
if !duplicate {
finalDir := filepath.Dir(string(finalPath))
fileutils.RemoveDir(types.Path(finalDir), r.Logger)
}
// NOTE: It should really not be possible to fail the uniqueness test here so
// there is no need to handle that separately
return errors.New("failed to store file metadata in DB")
}
go func() {
busy, err := thumbnailer.GenerateThumbnails(
context.Background(), finalPath, thumbnailSizes, r.MediaMetadata,
activeThumbnailGeneration, maxThumbnailGenerators, db, r.Logger,
)
if err != nil {
r.Logger.WithError(err).Warn("Error generating thumbnails")
}
if busy {
r.Logger.Warn("Maximum number of active thumbnail generators reached. Skipping pre-generation.")
}
}()
r.Logger.WithFields(log.Fields{
"UploadName": r.MediaMetadata.UploadName,
"Base64Hash": r.MediaMetadata.Base64Hash,
"FileSizeBytes": r.MediaMetadata.FileSizeBytes,
"ContentType": r.MediaMetadata.ContentType,
}).Debug("Remote file cached")
return nil
}
func (r *downloadRequest) GetContentLengthAndReader(contentLengthHeader string, reader io.ReadCloser, maxFileSizeBytes config.FileSizeBytes) (int64, io.Reader, error) {
var contentLength int64
if contentLengthHeader != "" {
// A Content-Length header is provided. Let's try to parse it.
parsedLength, parseErr := strconv.ParseInt(contentLengthHeader, 10, 64)
if parseErr != nil {
r.Logger.WithError(parseErr).Warn("Failed to parse content length")
return 0, nil, fmt.Errorf("strconv.ParseInt: %w", parseErr)
}
if maxFileSizeBytes > 0 && parsedLength > int64(maxFileSizeBytes) {
return 0, nil, fmt.Errorf(
"remote file size (%d bytes) exceeds locally configured max media size (%d bytes)",
parsedLength, maxFileSizeBytes,
)
}
// We successfully parsed the Content-Length, so we'll return a limited
// reader that restricts us to reading only up to this size.
reader = io.NopCloser(io.LimitReader(reader, parsedLength))
contentLength = parsedLength
} else {
// Content-Length header is missing. If we have a maximum file size
// configured then we'll just make sure that the reader is limited to
// that size. We'll return a zero content length, but that's OK, since
// ultimately it will get rewritten later when the temp file is written
// to disk.
if maxFileSizeBytes > 0 {
reader = io.NopCloser(io.LimitReader(reader, int64(maxFileSizeBytes)))
}
contentLength = 0
}
return contentLength, reader, nil
}
// mediaMeta contains information about a multipart media response.
// TODO: extend once something is defined.
type mediaMeta struct{}
// nolint: gocyclo
func (r *downloadRequest) fetchRemoteFile(
ctx context.Context,
client *fclient.Client,
absBasePath config.Path,
maxFileSizeBytes config.FileSizeBytes,
) (types.Path, bool, error) {
r.Logger.Debug("Fetching remote file")
// Attempt to download via authenticated media endpoint
isAuthed := true
resp, err := r.fedClient.DownloadMedia(ctx, r.origin, r.MediaMetadata.Origin, string(r.MediaMetadata.MediaID))
if err != nil || (resp != nil && resp.StatusCode != http.StatusOK) {
isAuthed = false
// try again on the unauthed endpoint
// create request for remote file
resp, err = client.CreateMediaDownloadRequest(ctx, r.MediaMetadata.Origin, string(r.MediaMetadata.MediaID))
if err != nil || (resp != nil && resp.StatusCode != http.StatusOK) {
if resp != nil && resp.StatusCode == http.StatusNotFound {
return "", false, fmt.Errorf("File with media ID %q does not exist on %s", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)
}
return "", false, fmt.Errorf("file with media ID %q could not be downloaded from %s: %w", r.MediaMetadata.MediaID, r.MediaMetadata.Origin, err)
}
}
defer resp.Body.Close() // nolint: errcheck
// If this wasn't a multipart response, set the Content-Type now. Will be overwritten
// by the multipart Content-Type below.
r.MediaMetadata.ContentType = types.ContentType(resp.Header.Get("Content-Type"))
var contentLength int64
var reader io.Reader
var parseErr error
if isAuthed {
contentLength, reader, parseErr = parseMultipartResponse(r, resp, maxFileSizeBytes)
} else {
// The reader returned here will be limited either by the Content-Length
// and/or the configured maximum media size.
contentLength, reader, parseErr = r.GetContentLengthAndReader(resp.Header.Get("Content-Length"), resp.Body, maxFileSizeBytes)
}
if parseErr != nil {
return "", false, parseErr
}
if maxFileSizeBytes > 0 && contentLength > int64(maxFileSizeBytes) {
// TODO: Bubble up this as a 413
return "", false, fmt.Errorf("remote file is too large (%v > %v bytes)", contentLength, maxFileSizeBytes)
}
r.MediaMetadata.FileSizeBytes = types.FileSizeBytes(contentLength)
dispositionHeader := resp.Header.Get("Content-Disposition")
if _, params, e := mime.ParseMediaType(dispositionHeader); e == nil {
if params["filename"] != "" {
r.MediaMetadata.UploadName = types.Filename(params["filename"])
} else if params["filename*"] != "" {
r.MediaMetadata.UploadName = types.Filename(params["filename*"])
}
} else {
if matches := rfc6266.FindStringSubmatch(dispositionHeader); len(matches) > 1 {
// Always prefer the RFC6266 UTF-8 name if possible
r.MediaMetadata.UploadName = types.Filename(matches[1])
} else if matches := rfc2183.FindStringSubmatch(dispositionHeader); len(matches) > 1 {
// Otherwise, see if an RFC2183 name was provided (ASCII only)
r.MediaMetadata.UploadName = types.Filename(matches[1])
}
}
r.Logger.Trace("Transferring remote file")
// The file data is hashed but is NOT used as the MediaID, unlike in Upload. The hash is useful as a
// method of deduplicating files to save storage, as well as a way to conduct
// integrity checks on the file data in the repository.
// Data is truncated to maxFileSizeBytes. Content-Length was reported as 0 < Content-Length <= maxFileSizeBytes so this is OK.
hash, bytesWritten, tmpDir, err := fileutils.WriteTempFile(ctx, reader, absBasePath)
if err != nil {
r.Logger.WithError(err).WithFields(log.Fields{
"MaxFileSizeBytes": maxFileSizeBytes,
}).Warn("Error while downloading file from remote server")
return "", false, errors.New("file could not be downloaded from remote server")
}
r.Logger.Trace("Remote file transferred")
// It's possible the bytesWritten to the temporary file is different to the reported Content-Length from the remote
// request's response. bytesWritten is therefore used as it is what would be sent to clients when reading from the local
// file.
r.MediaMetadata.FileSizeBytes = types.FileSizeBytes(bytesWritten)
r.MediaMetadata.Base64Hash = hash
// The database is the source of truth so we need to have moved the file first
finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger)
if err != nil {
return "", false, fmt.Errorf("fileutils.MoveFileWithHashCheck: %w", err)
}
if duplicate {
r.Logger.WithField("dst", finalPath).Trace("File was stored previously - discarding duplicate")
// Continue on to store the metadata in the database
}
return types.Path(finalPath), duplicate, nil
}
func parseMultipartResponse(r *downloadRequest, resp *http.Response, maxFileSizeBytes config.FileSizeBytes) (int64, io.Reader, error) {
_, params, err := mime.ParseMediaType(resp.Header.Get("Content-Type"))
if err != nil {
return 0, nil, err
}
if params["boundary"] == "" {
return 0, nil, fmt.Errorf("no boundary header found on media %s from %s", r.MediaMetadata.MediaID, r.MediaMetadata.Origin)
}
mr := multipart.NewReader(resp.Body, params["boundary"])
// Get the first, JSON, part
p, err := mr.NextPart()
if err != nil {
return 0, nil, err
}
defer p.Close() // nolint: errcheck
if p.Header.Get("Content-Type") != "application/json" {
return 0, nil, fmt.Errorf("first part of the response must be application/json")
}
// Try to parse media meta information
meta := mediaMeta{}
if err = json.NewDecoder(p).Decode(&meta); err != nil {
return 0, nil, err
}
defer p.Close() // nolint: errcheck
// Get the actual media content
p, err = mr.NextPart()
if err != nil {
return 0, nil, err
}
redirect := p.Header.Get("Location")
if redirect != "" {
return 0, nil, fmt.Errorf("Location header is not yet supported")
}
contentLength, reader, err := r.GetContentLengthAndReader(p.Header.Get("Content-Length"), p, maxFileSizeBytes)
// For multipart requests, we need to get the Content-Type of the second part, which is the actual media
r.MediaMetadata.ContentType = types.ContentType(p.Header.Get("Content-Type"))
return contentLength, reader, err
}
// contentDispositionFor returns the Content-Disposition for a given
// content type.
func contentDispositionFor(contentType types.ContentType) string {
if _, ok := allowInlineTypes[contentType]; ok {
return "inline"
}
return "attachment"
}