diff --git a/src/Synercoding.FileFormats.Pdf/Content/ContentStream.cs b/src/Synercoding.FileFormats.Pdf/Content/ContentStream.cs
index eab9d1b..2c67cc4 100644
--- a/src/Synercoding.FileFormats.Pdf/Content/ContentStream.cs
+++ b/src/Synercoding.FileFormats.Pdf/Content/ContentStream.cs
@@ -234,7 +234,7 @@ public ContentStream EndText()
public ContentStream ShowTextTj(byte[] line)
{
InnerStream
- .WriteStringLiteral(line)
+ .WriteStringHex(line)
.Space()
.Write("Tj")
.NewLine();
@@ -250,7 +250,7 @@ public ContentStream ShowTextTj(byte[] line)
public ContentStream MoveNextLineShowText(byte[] line)
{
InnerStream
- .WriteStringLiteral(line)
+ .WriteStringHex(line)
.Space()
.Write("'")
.NewLine();
@@ -285,7 +285,7 @@ public ContentStream MoveNextLineShowText(byte[] line, double wordSpacing, doubl
.Space()
.Write(characterSpacing)
.Space()
- .WriteStringLiteral(line)
+ .WriteStringHex(line)
.Space()
.Write("\"")
.NewLine();
diff --git a/src/Synercoding.FileFormats.Pdf/Generation/PdfStream.cs b/src/Synercoding.FileFormats.Pdf/Generation/PdfStream.cs
index e6cbd12..378ffb6 100644
--- a/src/Synercoding.FileFormats.Pdf/Generation/PdfStream.cs
+++ b/src/Synercoding.FileFormats.Pdf/Generation/PdfStream.cs
@@ -259,16 +259,7 @@ internal PdfStream WriteStringLiteral(string value)
: [.. Encoding.UTF8.Preamble, .. Encoding.UTF8.GetBytes(value)];
foreach (var b in bytes)
- {
- if (b == '(')
- Write('\\').Write('(');
- else if (b == ')')
- Write('\\').Write(')');
- else if (b == '\\')
- Write('\\').Write('\\');
- else
- Write(b);
- }
+ _writeLiteralByte(b);
WriteByte(0x29); // )
@@ -285,22 +276,77 @@ internal PdfStream WriteStringLiteral(byte[] encodedString)
WriteByte(0x28); // (
foreach (var b in encodedString)
+ _writeLiteralByte(b);
+
+ WriteByte(0x29); // )
+
+ return this;
+ }
+
+ ///
+ /// Write an encoded byte sequence to the stream as a PDF hexadecimal string.
+ ///
+ ///
+ /// Hexadecimal strings (ISO 32000-1 §7.3.4.3) are the correct container for arbitrary
+ /// binary data such as CID-encoded show-text operands: they have no escape rules and
+ /// no end-of-line normalisation, so every byte round-trips exactly.
+ ///
+ /// The bytes to write.
+ /// The to support chaining operations.
+ internal PdfStream WriteStringHex(byte[] encodedString)
+ {
+ WriteByte(0x3C); // <
+
+ Span pair = stackalloc byte[2];
+ foreach (var b in encodedString)
+ {
+ pair[0] = _hexNibble(b >> 4);
+ pair[1] = _hexNibble(b & 0x0F);
+ Write(pair);
+ }
+
+ WriteByte(0x3E); // >
+
+ return this;
+ }
+
+ private void _writeLiteralByte(byte b)
+ {
+ switch (b)
{
- if (b == '(')
+ case (byte)'(':
Write('\\').Write('(');
- else if (b == ')')
+ break;
+ case (byte)')':
Write('\\').Write(')');
- else if (b == '\\')
+ break;
+ case (byte)'\\':
Write('\\').Write('\\');
- else
+ break;
+ case 0x0A:
+ Write('\\').Write('n');
+ break;
+ case 0x0D:
+ Write('\\').Write('r');
+ break;
+ case 0x09:
+ Write('\\').Write('t');
+ break;
+ case 0x08:
+ Write('\\').Write('b');
+ break;
+ case 0x0C:
+ Write('\\').Write('f');
+ break;
+ default:
WriteByte(b);
+ break;
}
-
- WriteByte(0x29); // )
-
- return this;
}
+ private static byte _hexNibble(int n)
+ => (byte)( n < 10 ? ( '0' + n ) : ( 'A' + n - 10 ) );
+
///
/// Write an array of numbers to the pdf stream
///
diff --git a/tests/Synercoding.FileFormats.Pdf.Tests/Content/ContentStreamTests.cs b/tests/Synercoding.FileFormats.Pdf.Tests/Content/ContentStreamTests.cs
new file mode 100644
index 0000000..d2f44db
--- /dev/null
+++ b/tests/Synercoding.FileFormats.Pdf.Tests/Content/ContentStreamTests.cs
@@ -0,0 +1,119 @@
+using Synercoding.FileFormats.Pdf.Content;
+using Synercoding.FileFormats.Pdf.Generation;
+using Synercoding.FileFormats.Pdf.Generation.Internal;
+using Synercoding.FileFormats.Pdf.Primitives;
+using System.Text;
+
+namespace Synercoding.FileFormats.Pdf.Tests.Content;
+
+///
+/// Regression tests for issue #87 — CID-encoded show-text operands must survive the
+/// serialiser unchanged. When literal strings were used, bytes containing 0x0D were
+/// normalised to 0x0A by the PDF parser (ISO 32000-1 §7.3.4.2), which silently
+/// shifted the CID lookup and produced wrong or missing glyphs.
+///
+public class ContentStreamTests : IDisposable
+{
+ private readonly TableBuilder _tableBuilder;
+ private readonly CachedResources _cachedResources;
+ private readonly PageResources _pageResources;
+ private readonly ContentStream _contentStream;
+
+ public ContentStreamTests()
+ {
+ _tableBuilder = new TableBuilder();
+ _cachedResources = new CachedResources(_tableBuilder);
+ _pageResources = new PageResources(_tableBuilder, _cachedResources);
+ _contentStream = new ContentStream(_tableBuilder.ReserveId(), _pageResources);
+ }
+
+ public void Dispose()
+ {
+ _contentStream.Dispose();
+ _pageResources.Dispose();
+ }
+
+ [Fact]
+ public void ShowTextTj_GlyphId0x000D_WritesHexString()
+ {
+ // Reproduces the original issue #87 case: Source Sans Pro capital 'J'
+ // maps to glyph id 13 (0x000D). The bytes must round-trip verbatim.
+ _contentStream.ShowTextTj(new byte[] { 0x00, 0x0D });
+
+ var written = Encoding.ASCII.GetString(_contentStream.InnerStream.ToStreamObject().RawData);
+
+ Assert.Contains("<000D>", written);
+ Assert.Contains("Tj", written);
+ }
+
+ [Fact]
+ public void ShowTextTj_GlyphIdWithCarriageReturnInHighByte_WritesHexString()
+ {
+ _contentStream.ShowTextTj(new byte[] { 0x0D, 0x42 });
+
+ var written = Encoding.ASCII.GetString(_contentStream.InnerStream.ToStreamObject().RawData);
+
+ Assert.Contains("<0D42>", written);
+ }
+
+ [Fact]
+ public void ShowTextTj_ConsecutiveCidsForming0D0A_PreservesAlignment()
+ {
+ // A literal string would collapse 0D 0A to a single 0x0A, shifting
+ // alignment for every subsequent 2-byte CID from that point on.
+ _contentStream.ShowTextTj(new byte[] { 0x01, 0x0D, 0x0A, 0x02 });
+
+ var written = Encoding.ASCII.GetString(_contentStream.InnerStream.ToStreamObject().RawData);
+
+ Assert.Contains("<010D0A02>", written);
+ }
+
+ [Fact]
+ public void ShowTextTj_BytesMatchingLiteralDelimiters_AreEmittedAsHex()
+ {
+ // A glyph id whose byte encoding contains '(' / ')' / '\' was previously
+ // escaped for a literal string; hex strings write them verbatim.
+ _contentStream.ShowTextTj(new byte[] { 0x28, 0x29, 0x5C });
+
+ var written = Encoding.ASCII.GetString(_contentStream.InnerStream.ToStreamObject().RawData);
+
+ Assert.Contains("<28295C>", written);
+ Assert.DoesNotContain("\\(", written);
+ Assert.DoesNotContain("\\)", written);
+ Assert.DoesNotContain("\\\\", written);
+ }
+
+ [Fact]
+ public void ShowTextTj_DoesNotEmitLiteralStringDelimiters()
+ {
+ // Regression guard: the operand must no longer be wrapped in ( … ).
+ _contentStream.ShowTextTj(new byte[] { 0x00, 0x0D });
+
+ var rawData = _contentStream.InnerStream.ToStreamObject().RawData;
+
+ Assert.DoesNotContain((byte)'(', rawData);
+ Assert.DoesNotContain((byte)')', rawData);
+ }
+
+ [Fact]
+ public void MoveNextLineShowText_GlyphId0x000D_WritesHexString()
+ {
+ _contentStream.MoveNextLineShowText(new byte[] { 0x00, 0x0D });
+
+ var written = Encoding.ASCII.GetString(_contentStream.InnerStream.ToStreamObject().RawData);
+
+ Assert.Contains("<000D>", written);
+ Assert.Contains("'", written);
+ }
+
+ [Fact]
+ public void MoveNextLineShowText_WithSpacing_WritesHexString()
+ {
+ _contentStream.MoveNextLineShowText(new byte[] { 0x00, 0x0D }, wordSpacing: 1.0, characterSpacing: 2.0);
+
+ var written = Encoding.ASCII.GetString(_contentStream.InnerStream.ToStreamObject().RawData);
+
+ Assert.Contains("<000D>", written);
+ Assert.Contains("\"", written);
+ }
+}
diff --git a/tests/Synercoding.FileFormats.Pdf.Tests/Generation/PdfStreamTests.cs b/tests/Synercoding.FileFormats.Pdf.Tests/Generation/PdfStreamTests.cs
index 533b840..1aa256a 100644
--- a/tests/Synercoding.FileFormats.Pdf.Tests/Generation/PdfStreamTests.cs
+++ b/tests/Synercoding.FileFormats.Pdf.Tests/Generation/PdfStreamTests.cs
@@ -266,6 +266,151 @@ public void Test_ToStreamObject_WithThreeFilters_ReturnsStreamObjectWithFilterAr
Assert.Equal(thirdEncoded, streamObject.RawData);
}
+ // Regression tests for issue #87 — the literal-string escape table and the
+ // hex-string fallback used for CID-encoded show-text operands.
+
+ [Fact]
+ public void WriteStringHex_EmptyArray_WritesEmptyAngleBrackets()
+ {
+ using var memoryStream = new MemoryStream();
+ var pdfStream = new PdfStream(memoryStream);
+
+ pdfStream.WriteStringHex(Array.Empty());
+
+ Assert.Equal(new byte[] { 0x3C, 0x3E }, memoryStream.ToArray());
+ }
+
+ [Fact]
+ public void WriteStringHex_GlyphId0x000D_PreservesCarriageReturn()
+ {
+ // Issue #87: Source Sans Pro capital 'J' has glyph id 13 (0x000D).
+ // Previously this was written as a literal string, where the parser
+ // would silently normalise 0x0D to 0x0A and the consumer would look
+ // up CID 10 instead of 13.
+ using var memoryStream = new MemoryStream();
+ var pdfStream = new PdfStream(memoryStream);
+
+ pdfStream.WriteStringHex(new byte[] { 0x00, 0x0D });
+
+ Assert.Equal("<000D>", Encoding.ASCII.GetString(memoryStream.ToArray()));
+ }
+
+ [Fact]
+ public void WriteStringHex_GlyphIdWithCarriageReturnInHighByte_IsPreserved()
+ {
+ using var memoryStream = new MemoryStream();
+ var pdfStream = new PdfStream(memoryStream);
+
+ pdfStream.WriteStringHex(new byte[] { 0x0D, 0x42 });
+
+ Assert.Equal("<0D42>", Encoding.ASCII.GetString(memoryStream.ToArray()));
+ }
+
+ [Fact]
+ public void WriteStringHex_ConsecutiveCidsForming0D0A_PreservesAlignment()
+ {
+ // A literal string would collapse the 0D 0A pair into a single 0x0A,
+ // shifting alignment for every subsequent 2-byte CID. A hex string
+ // round-trips every byte.
+ using var memoryStream = new MemoryStream();
+ var pdfStream = new PdfStream(memoryStream);
+
+ pdfStream.WriteStringHex(new byte[] { 0x01, 0x0D, 0x0A, 0x02 });
+
+ Assert.Equal("<010D0A02>", Encoding.ASCII.GetString(memoryStream.ToArray()));
+ }
+
+ [Fact]
+ public void WriteStringHex_BytesThatWouldBeEscapedInLiteral_AreWrittenRaw()
+ {
+ using var memoryStream = new MemoryStream();
+ var pdfStream = new PdfStream(memoryStream);
+
+ pdfStream.WriteStringHex(new byte[] { 0x28, 0x29, 0x5C });
+
+ Assert.Equal("<28295C>", Encoding.ASCII.GetString(memoryStream.ToArray()));
+ }
+
+ [Fact]
+ public void WriteStringHex_AllBytes_RoundTripExactly()
+ {
+ var input = new byte[256];
+ for (int i = 0; i < 256; i++)
+ input[i] = (byte)i;
+
+ using var memoryStream = new MemoryStream();
+ var pdfStream = new PdfStream(memoryStream);
+
+ pdfStream.WriteStringHex(input);
+
+ var output = Encoding.ASCII.GetString(memoryStream.ToArray());
+ Assert.StartsWith("<", output);
+ Assert.EndsWith(">", output);
+ Assert.Equal(( input.Length * 2 ) + 2, output.Length);
+
+ // Parse the hex back and verify every byte round-trips.
+ var hex = output[1..^1];
+ var roundTripped = new byte[input.Length];
+ for (int i = 0; i < input.Length; i++)
+ roundTripped[i] = Convert.ToByte(hex.Substring(i * 2, 2), 16);
+ Assert.Equal(input, roundTripped);
+ }
+
+ [Fact]
+ public void WriteStringLiteral_String_EscapesCarriageReturn()
+ {
+ using var memoryStream = new MemoryStream();
+ var pdfStream = new PdfStream(memoryStream);
+
+ pdfStream.WriteStringLiteral("a\rb");
+
+ Assert.Equal("(a\\rb)", Encoding.ASCII.GetString(memoryStream.ToArray()));
+ }
+
+ [Fact]
+ public void WriteStringLiteral_String_EscapesLineFeed()
+ {
+ using var memoryStream = new MemoryStream();
+ var pdfStream = new PdfStream(memoryStream);
+
+ pdfStream.WriteStringLiteral("a\nb");
+
+ Assert.Equal("(a\\nb)", Encoding.ASCII.GetString(memoryStream.ToArray()));
+ }
+
+ [Fact]
+ public void WriteStringLiteral_String_EscapesTab()
+ {
+ using var memoryStream = new MemoryStream();
+ var pdfStream = new PdfStream(memoryStream);
+
+ pdfStream.WriteStringLiteral("a\tb");
+
+ Assert.Equal("(a\\tb)", Encoding.ASCII.GetString(memoryStream.ToArray()));
+ }
+
+ [Fact]
+ public void WriteStringLiteral_String_StillEscapesParenthesesAndBackslash()
+ {
+ using var memoryStream = new MemoryStream();
+ var pdfStream = new PdfStream(memoryStream);
+
+ pdfStream.WriteStringLiteral("(a)\\b");
+
+ Assert.Equal("(\\(a\\)\\\\b)", Encoding.ASCII.GetString(memoryStream.ToArray()));
+ }
+
+ [Fact]
+ public void WriteStringLiteral_Bytes_EscapesControlCharacters()
+ {
+ using var memoryStream = new MemoryStream();
+ var pdfStream = new PdfStream(memoryStream);
+
+ pdfStream.WriteStringLiteral(new byte[] { 0x0D, 0x0A, 0x09, 0x08, 0x0C });
+
+ Assert.Equal("(\\r\\n\\t\\b\\f)", Encoding.ASCII.GetString(memoryStream.ToArray()));
+ }
+
private class PassThroughFilterStub : IStreamFilter
{
public PdfName Name => PdfName.Get("PassThrough");