From 52ccc6ecc3afa8e36668c646999483f60a893d43 Mon Sep 17 00:00:00 2001 From: invario <67800603+invario@users.noreply.github.com> Date: Wed, 16 Jul 2025 09:50:30 -0400 Subject: [PATCH] feat(previews): previews for large remote files without full file download Co-authored-by: Kate <26026535+provokateurin@users.noreply.github.com> Signed-off-by: invario <67800603+invario@users.noreply.github.com> --- lib/private/Preview/Movie.php | 142 ++++++++++++++++++++++++++++++---- 1 file changed, 129 insertions(+), 13 deletions(-) diff --git a/lib/private/Preview/Movie.php b/lib/private/Preview/Movie.php index 47895f999d810..82ebfd28fae94 100644 --- a/lib/private/Preview/Movie.php +++ b/lib/private/Preview/Movie.php @@ -54,22 +54,43 @@ public function getThumbnail(File $file, int $maxX, int $maxY): ?IImage { $result = null; if ($this->useTempFile($file)) { - // Try downloading 5 MB first, as it's likely that the first frames are present there. - // In some cases this doesn't work, for example when the moov atom is at the - // end of the file, so if it fails we fall back to getting the full file. - // Unless the file is not local (e.g. S3) as we do not want to download the whole (e.g. 37Gb) file + // Try downloading 10 MB first, as it's likely that the first needed frames are present + // there along with the 'moov' atom (used in MP4/MOV files). In some cases this doesn't + // work, (e.g. the 'moov' atom is at the end, or the videos is high bitrate) if ($file->getStorage()->isLocal()) { - $sizeAttempts = [5242880, null]; + // File is local, make two attempts: 10 MB, then the entire file + // Also, set attempts for timestamp at 5, 1, and 0 seconds + $sizeAttempts = [10485760, null]; + $timeAttempts = [5, 1, 0]; } else { - $sizeAttempts = [5242880]; + // File is remote, make one attempt: 10 MB will be downloaded from the file along with + // the 'moov' atom. + // Also, set attempts for timestamp at 1 and 0 seconds only due to less video data. + // WARNING: setting the time attempts to higher values will generate corrupt previews + // especially on higher bitrate videos. + // Example bitrates in the higher range: + // 4K HDR H265 60 FPS = 75 Mbps = 9 MB per second needed for a still + // 1080p H265 30 FPS = 10 Mbps = 1.25 MB per second needed for a still + // 1080p H264 30 FPS = 16 Mbps = 2 MB per second needed for a still + $sizeAttempts = [10485760]; + $timeAttempts = [1, 0]; } } else { // size is irrelevant, only attempt once $sizeAttempts = [null]; + $timeAttempts = [5, 1, 0]; } foreach ($sizeAttempts as $size) { - $absPath = $this->getLocalFile($file, $size); + $absPath = false; + // File is remote, generate a sparse file + if (!$file->getStorage()->isLocal()) { + $absPath = $this->getSparseFile($file, $size); + } + // Defaults to existing routine if generating sparse file fails + if ($absPath === false) { + $absPath = $this->getLocalFile($file, $size); + } if ($absPath === false) { Server::get(LoggerInterface::class)->error( 'Failed to get local file to generate thumbnail for: ' . $file->getPath(), @@ -78,14 +99,14 @@ public function getThumbnail(File $file, int $maxX, int $maxY): ?IImage { return null; } - $result = $this->generateThumbNail($maxX, $maxY, $absPath, 5); - if ($result === null) { - $result = $this->generateThumbNail($maxX, $maxY, $absPath, 1); - if ($result === null) { - $result = $this->generateThumbNail($maxX, $maxY, $absPath, 0); + // Attempt still image grabs from selected timestamps + foreach ($timeAttempts as $timeStamp) { + $result = $this->generateThumbNail($maxX, $maxY, $absPath, $timeStamp); + if ($result !== null) { + break; } } - + $this->cleanTmpFiles(); if ($result !== null) { @@ -95,6 +116,101 @@ public function getThumbnail(File $file, int $maxX, int $maxY): ?IImage { return $result; } + + private function getSparseFile(File $file, int $size): string|false { + $absPath = Server::get(ITempManager::class)->getTemporaryFile(); + if ($absPath === false) { + Server::get(LoggerInterface::class)->error( + 'Failed to get sparse file to generate thumbnail for: ' . $file->getPath(), + ['app' => 'core'] + ); + return false; + } + $content = $file->fopen('r'); + + // Stream does not support seeking so generating a sparse file is not possible. + if (stream_get_meta_data($content)['seekable'] === false) { + fclose($content); + return false; + } + + $sparseFile = fopen($absPath, 'w'); + + // If video size is less than or equal to $size then just download entire file + if (($size) >= $file->getSize()) { + stream_copy_to_stream($content, $sparseFile); + } else { + // Firsts 4 bytes indicate length of 1st atom. + $ftypSize = hexdec(bin2hex(stream_get_contents($content, 4, 0))); + // Download next 4 bytes to find name of 1st atom. + $ftypLabel = stream_get_contents($content, 4, 4); + + // MP4/MOVs all begin with the 'ftyp' atom. Anything else is not MP4/MOV + // and therefore should be processed differently. + if ($ftypLabel === 'ftyp') { + // Set offset for 2nd atom. Atoms begin where the previous one ends. + $offset = $ftypSize; + $moovSize = 0; + $moovOffset = 0; + // Iterate and seek from atom to until the 'moov' atom is found or + // EOF is reached + while (($offset + 8 < $file->getSize()) && ($moovSize === 0)) { + // First 4 bytes of atom header indicates size of the atom. + $atomSize = hexdec(bin2hex(stream_get_contents($content, 4, $offset))); + // Next 4 bytes of atom header is the name/label of the atom + $atomLabel = stream_get_contents($content, 4, $offset + 4); + // Size value has two special values that don't directly indicate size + // 0 = atom size equals the rest of the file + if ($atomSize === 0) { + $atomSize = $file->getsize() - $offset; + } else { + // 1 = read an additional 8 bytes after the label to get the 64 bit + // size of the atom. Needed for large atoms like 'mdat' (the video data) + if ($atomSize === 1) { + $atomSize = hexdec(bin2hex(stream_get_contents($content, 8, $offset + 8))); + } + } + // Found the 'moov' atom, store its location and size + if ($atomLabel === 'moov') { + $moovSize = $atomSize; + $moovOffset = $offset; + } + $offset += $atomSize; + } + // 'moov' atom wasn't found or larger than $size + // 'moov' atoms are generally small relative to video length. + // Examples: + // 4K HDR H265 60 FPS, 10 second video = 12.5 KB 'moov' atom, 54 MB total file size + // 4K HDR H265 60 FPS, 5 minute video = 330 KB 'moov' atom, 1.95 GB total file size + // Capping it at $size is a precaution against a corrupt/malicious 'moov' atom + // Also, if the 'moov' atom size+offset extends past EOF, it is invalid. + if (($moovSize === 0) || ($moovSize > $size) || ($moovOffset + $moovSize > $file->getSize())) { + return false; + } + // Generate new file of same size + ftruncate($sparseFile, $file->getSize()); + fseek($content, 0); + // Copy first $size bytes of video into new file + stream_copy_to_stream($content, $sparseFile, $size, 0); + + // If 'moov' is located after $size in the video, it was already streamed, + // so no need to download it again. + if ($moovOffset >= $size) { + // Seek to where 'moov' atom needs to be placed + fseek($content, $moovOffset); + fseek($sparseFile, $moovOffset); + stream_copy_to_stream($content, $sparseFile, $moovSize, 0); + } + } else { + // 'ftyp' atom not found, not a valid MP4/MOV + fclose($content); + return false; + } + } + fclose($content); + fclose($sparseFile); + return $absPath; + } private function useHdr(string $absPath): bool { // load ffprobe path from configuration, otherwise generate binary path using ffmpeg binary path