Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/Synercoding.FileFormats.Pdf/Content/ContentStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ public ContentStream EndText()
public ContentStream ShowTextTj(byte[] line)
{
InnerStream
.WriteStringLiteral(line)
.WriteStringHex(line)
.Space()
.Write("Tj")
.NewLine();
Expand All @@ -250,7 +250,7 @@ public ContentStream ShowTextTj(byte[] line)
public ContentStream MoveNextLineShowText(byte[] line)
{
InnerStream
.WriteStringLiteral(line)
.WriteStringHex(line)
.Space()
.Write("'")
.NewLine();
Expand Down Expand Up @@ -285,7 +285,7 @@ public ContentStream MoveNextLineShowText(byte[] line, double wordSpacing, doubl
.Space()
.Write(characterSpacing)
.Space()
.WriteStringLiteral(line)
.WriteStringHex(line)
.Space()
.Write("\"")
.NewLine();
Expand Down
82 changes: 64 additions & 18 deletions src/Synercoding.FileFormats.Pdf/Generation/PdfStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -259,16 +259,7 @@ internal PdfStream WriteStringLiteral(string value)
: [.. Encoding.UTF8.Preamble, .. Encoding.UTF8.GetBytes(value)];

foreach (var b in bytes)
{
if (b == '(')
Write('\\').Write('(');
else if (b == ')')
Write('\\').Write(')');
else if (b == '\\')
Write('\\').Write('\\');
else
Write(b);
}
_writeLiteralByte(b);

WriteByte(0x29); // )

Expand All @@ -285,22 +276,77 @@ internal PdfStream WriteStringLiteral(byte[] encodedString)
WriteByte(0x28); // (

foreach (var b in encodedString)
_writeLiteralByte(b);

WriteByte(0x29); // )

return this;
}

/// <summary>
/// Write an encoded byte sequence to the stream as a PDF hexadecimal string.
/// </summary>
/// <remarks>
/// Hexadecimal strings (ISO 32000-1 §7.3.4.3) are the correct container for arbitrary
/// binary data such as CID-encoded show-text operands: they have no escape rules and
/// no end-of-line normalisation, so every byte round-trips exactly.
/// </remarks>
/// <param name="encodedString">The bytes to write.</param>
/// <returns>The <see cref="PdfStream"/> to support chaining operations.</returns>
internal PdfStream WriteStringHex(byte[] encodedString)
{
WriteByte(0x3C); // <

Span<byte> pair = stackalloc byte[2];
foreach (var b in encodedString)
{
pair[0] = _hexNibble(b >> 4);
pair[1] = _hexNibble(b & 0x0F);
Write(pair);
}

WriteByte(0x3E); // >

return this;
}

private void _writeLiteralByte(byte b)
{
switch (b)
{
if (b == '(')
case (byte)'(':
Write('\\').Write('(');
else if (b == ')')
break;
case (byte)')':
Write('\\').Write(')');
else if (b == '\\')
break;
case (byte)'\\':
Write('\\').Write('\\');
else
break;
case 0x0A:
Write('\\').Write('n');
break;
case 0x0D:
Write('\\').Write('r');
break;
case 0x09:
Write('\\').Write('t');
break;
case 0x08:
Write('\\').Write('b');
break;
case 0x0C:
Write('\\').Write('f');
break;
default:
WriteByte(b);
break;
}

WriteByte(0x29); // )

return this;
}

private static byte _hexNibble(int n)
=> (byte)( n < 10 ? ( '0' + n ) : ( 'A' + n - 10 ) );

/// <summary>
/// Write an array of numbers to the pdf stream
/// </summary>
Expand Down
119 changes: 119 additions & 0 deletions tests/Synercoding.FileFormats.Pdf.Tests/Content/ContentStreamTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
using Synercoding.FileFormats.Pdf.Content;
using Synercoding.FileFormats.Pdf.Generation;
using Synercoding.FileFormats.Pdf.Generation.Internal;
using Synercoding.FileFormats.Pdf.Primitives;
using System.Text;

namespace Synercoding.FileFormats.Pdf.Tests.Content;

/// <summary>
/// Regression tests for issue #87 — CID-encoded show-text operands must survive the
/// serialiser unchanged. When literal strings were used, bytes containing 0x0D were
/// normalised to 0x0A by the PDF parser (ISO 32000-1 §7.3.4.2), which silently
/// shifted the CID lookup and produced wrong or missing glyphs.
/// </summary>
public class ContentStreamTests : IDisposable
{
private readonly TableBuilder _tableBuilder;
private readonly CachedResources _cachedResources;
private readonly PageResources _pageResources;
private readonly ContentStream _contentStream;

public ContentStreamTests()
{
_tableBuilder = new TableBuilder();
_cachedResources = new CachedResources(_tableBuilder);
_pageResources = new PageResources(_tableBuilder, _cachedResources);
_contentStream = new ContentStream(_tableBuilder.ReserveId(), _pageResources);
}

public void Dispose()
{
_contentStream.Dispose();
_pageResources.Dispose();
}

[Fact]
public void ShowTextTj_GlyphId0x000D_WritesHexString()
{
// Reproduces the original issue #87 case: Source Sans Pro capital 'J'
// maps to glyph id 13 (0x000D). The bytes must round-trip verbatim.
_contentStream.ShowTextTj(new byte[] { 0x00, 0x0D });

var written = Encoding.ASCII.GetString(_contentStream.InnerStream.ToStreamObject().RawData);

Assert.Contains("<000D>", written);
Assert.Contains("Tj", written);
}

[Fact]
public void ShowTextTj_GlyphIdWithCarriageReturnInHighByte_WritesHexString()
{
_contentStream.ShowTextTj(new byte[] { 0x0D, 0x42 });

var written = Encoding.ASCII.GetString(_contentStream.InnerStream.ToStreamObject().RawData);

Assert.Contains("<0D42>", written);
}

[Fact]
public void ShowTextTj_ConsecutiveCidsForming0D0A_PreservesAlignment()
{
// A literal string would collapse 0D 0A to a single 0x0A, shifting
// alignment for every subsequent 2-byte CID from that point on.
_contentStream.ShowTextTj(new byte[] { 0x01, 0x0D, 0x0A, 0x02 });

var written = Encoding.ASCII.GetString(_contentStream.InnerStream.ToStreamObject().RawData);

Assert.Contains("<010D0A02>", written);
}

[Fact]
public void ShowTextTj_BytesMatchingLiteralDelimiters_AreEmittedAsHex()
{
// A glyph id whose byte encoding contains '(' / ')' / '\' was previously
// escaped for a literal string; hex strings write them verbatim.
_contentStream.ShowTextTj(new byte[] { 0x28, 0x29, 0x5C });

var written = Encoding.ASCII.GetString(_contentStream.InnerStream.ToStreamObject().RawData);

Assert.Contains("<28295C>", written);
Assert.DoesNotContain("\\(", written);
Assert.DoesNotContain("\\)", written);
Assert.DoesNotContain("\\\\", written);
}

[Fact]
public void ShowTextTj_DoesNotEmitLiteralStringDelimiters()
{
// Regression guard: the operand must no longer be wrapped in ( … ).
_contentStream.ShowTextTj(new byte[] { 0x00, 0x0D });

var rawData = _contentStream.InnerStream.ToStreamObject().RawData;

Assert.DoesNotContain((byte)'(', rawData);
Assert.DoesNotContain((byte)')', rawData);
}

[Fact]
public void MoveNextLineShowText_GlyphId0x000D_WritesHexString()
{
_contentStream.MoveNextLineShowText(new byte[] { 0x00, 0x0D });

var written = Encoding.ASCII.GetString(_contentStream.InnerStream.ToStreamObject().RawData);

Assert.Contains("<000D>", written);
Assert.Contains("'", written);
}

[Fact]
public void MoveNextLineShowText_WithSpacing_WritesHexString()
{
_contentStream.MoveNextLineShowText(new byte[] { 0x00, 0x0D }, wordSpacing: 1.0, characterSpacing: 2.0);

var written = Encoding.ASCII.GetString(_contentStream.InnerStream.ToStreamObject().RawData);

Assert.Contains("<000D>", written);
Assert.Contains("\"", written);
}
}
Loading
Loading