Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions lib/pdf/reader/buffer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,30 @@ def token
#: () -> Integer
def find_first_xref_offset
check_size_is_non_zero
@io.seek(-TRAILING_BYTECOUNT, IO::SEEK_END) rescue @io.seek(0)
data = @io.read(TRAILING_BYTECOUNT)

raise MalformedPDFError, "PDF does not contain EOF marker" if data.nil?
# Skip trailing null bytes to find the effective end of the PDF.
# Some generators (e.g. Atos/Fonet) append thousands of null bytes
# after %%EOF. Scan backwards in chunks so a file padded with a huge
# run of nulls doesn't turn into a per-byte seek+read loop.
@io.seek(0, IO::SEEK_END)
end_pos = @io.pos

while end_pos > 0
chunk_size = [TRAILING_BYTECOUNT, end_pos].min
@io.seek(end_pos - chunk_size)
chunk = @io.read(chunk_size)
if chunk && (idx = chunk.rindex(/[^\x00]/))
end_pos = end_pos - chunk_size + idx + 1
break
end
end_pos -= chunk_size
end

start_pos = [end_pos - TRAILING_BYTECOUNT, 0].max
@io.seek(start_pos)
data = @io.read(end_pos - start_pos)

raise MalformedPDFError, "PDF does not contain EOF marker" if data.nil? || data.empty?

# the PDF 1.7 spec (section #3.4) says that EOL markers can be either \r, \n, or both.
lines = data.split(/[\n\r]+/).reverse
Expand Down
Binary file added spec/data/trailing_null_bytes.pdf
Binary file not shown.
3 changes: 3 additions & 0 deletions spec/integrity.yml
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,9 @@ data/tounicode-wrong-type-indirect.pdf:
data/tounicode-wrong-type.pdf:
:bytes: 12219
:md5: 0be721e975bc8ec21eae829e0cddc1af
data/trailing_null_bytes.pdf:
:bytes: 67721
:md5: 6b9b6375ca194fdf8687a44130c54df2
data/truetype-arial.pdf:
:bytes: 1387
:md5: 2b3e4ff85b618d1f4c6b3b5df2631ab0
Expand Down
18 changes: 18 additions & 0 deletions spec/reader/buffer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,24 @@
expect(buffer.find_first_xref_offset).to eql(145)
end
end

context "trailing_null_bytes.pdf (null bytes after the EOF marker)" do
it "finds the first xref offset" do
file = File.new pdf_spec_file("trailing_null_bytes")
buffer = PDF::Reader::Buffer.new file

expect(buffer.find_first_xref_offset).to eql(145)
end
end

context "when the file is nothing but null bytes" do
it "raises a MalformedPDFError" do
io = StringIO.new("\x00" * 10_000)
buffer = PDF::Reader::Buffer.new(io)

expect { buffer.find_first_xref_offset }.to raise_error(PDF::Reader::MalformedPDFError)
end
end
end

describe PDF::Reader::Buffer, "read method" do
Expand Down