Skip to content

Commit 9acf3a1

Browse files
committed
Handle WebP Exif with preamble
Some WebP files seem to have the JPEG Exif preamble. It may be an error, but it's safe to recover from it. Fixes the .NET side of drewnoakes/metadata-extractor#473.
1 parent 3aa6bbd commit 9acf3a1

File tree

3 files changed

+17
-3
lines changed

3 files changed

+17
-3
lines changed

MetadataExtractor/Formats/Exif/ExifReader.cs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,27 @@ namespace MetadataExtractor.Formats.Exif
2424
/// <author>Drew Noakes https://drewnoakes.com</author>
2525
public sealed class ExifReader : IJpegSegmentMetadataReader
2626
{
27-
/// <summary>Exif data stored in JPEG files' APP1 segment are preceded by this six character preamble.</summary>
27+
/// <summary>Exif data stored in JPEG files' APP1 segment are preceded by this six character preamble "Exif\0\0".</summary>
2828
public const string JpegSegmentPreamble = "Exif\x0\x0";
2929

3030
ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes => new[] { JpegSegmentType.App1 };
3131

3232
public DirectoryList ReadJpegSegments(IEnumerable<JpegSegment> segments)
3333
{
3434
return segments
35-
.Where(segment => segment.Bytes.Length >= JpegSegmentPreamble.Length && Encoding.UTF8.GetString(segment.Bytes, 0, JpegSegmentPreamble.Length) == JpegSegmentPreamble)
35+
.Where(segment => StartsWithJpegExifPreamble(segment.Bytes))
3636
.SelectMany(segment => Extract(new ByteArrayReader(segment.Bytes, baseOffset: JpegSegmentPreamble.Length)))
3737
.ToList();
3838
}
3939

40+
/// <summary>
41+
/// Indicates whether <paramref name="bytes"/> starts with <see cref="JpegSegmentPreamble"/>.
42+
/// </summary>
43+
public static bool StartsWithJpegExifPreamble(byte[] bytes)
44+
{
45+
return bytes.Length >= JpegSegmentPreamble.Length && Encoding.UTF8.GetString(bytes, 0, JpegSegmentPreamble.Length) == JpegSegmentPreamble;
46+
}
47+
4048
/// <summary>
4149
/// Reads TIFF formatted Exif data a specified offset within a <see cref="IndexedReader"/>.
4250
/// </summary>

MetadataExtractor/Formats/WebP/WebpRiffHandler.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,12 @@ public void ProcessChunk(string fourCc, byte[] payload)
5050
{
5151
case "EXIF":
5252
{
53-
_directories.AddRange(new ExifReader().Extract(new ByteArrayReader(payload)));
53+
// We have seen WebP images with and without the preamble here. It's likely that some software incorrectly
54+
// copied an entire JPEG segment into the WebP image. Regardless, we can handle it here.
55+
var reader = ExifReader.StartsWithJpegExifPreamble(payload)
56+
? new ByteArrayReader(payload, ExifReader.JpegSegmentPreamble.Length)
57+
: new ByteArrayReader(payload);
58+
_directories.AddRange(new ExifReader().Extract(reader));
5459
break;
5560
}
5661
case "ICCP":
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
#nullable enable
2+
static MetadataExtractor.Formats.Exif.ExifReader.StartsWithJpegExifPreamble(byte[]! bytes) -> bool

0 commit comments

Comments
 (0)