Skip to content

Commit 90cb9fa

Browse files
readline: add unicode line separator option
Signed-off-by: Scarab Systems <scarab.systems@yahoo.com>
1 parent 822ef3a commit 90cb9fa

4 files changed

Lines changed: 63 additions & 10 deletions

File tree

doc/api/readline.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,10 @@ added: v0.1.98
120120
-->
121121

122122
The `'line'` event is emitted whenever the `input` stream receives an
123-
end-of-line input (`\n`, `\r`, or `\r\n`). This usually occurs when the user
124-
presses <kbd>Enter</kbd> or <kbd>Return</kbd>.
123+
end-of-line input (`\n`, `\r`, or `\r\n`). By default, Unicode line separator
124+
(`\u2028`) and paragraph separator (`\u2029`) characters are also treated as
125+
end-of-line input. This usually occurs when the user presses <kbd>Enter</kbd>
126+
or <kbd>Return</kbd>.
125127

126128
The `'line'` event is also emitted if new data has been read from a stream and
127129
that stream ends without a final end-of-line marker.
@@ -716,6 +718,8 @@ added: v17.0.0
716718
`100`. It can be set to `Infinity`, in which case `\r` followed by `\n`
717719
will always be considered a single newline (which may be reasonable for
718720
[reading files][] with `\r\n` line delimiter). **Default:** `100`.
721+
* `unicodeLineSeparators` {boolean} If `true`, `\u2028` and `\u2029` will be
722+
treated as end-of-line input. **Default:** `true`.
719723
* `escapeCodeTimeout` {number} The duration `readlinePromises` will wait for a
720724
character (when reading an ambiguous key sequence in milliseconds one that
721725
can both form a complete key sequence using the input read so far and can
@@ -981,6 +985,8 @@ changes:
981985
`100`. It can be set to `Infinity`, in which case `\r` followed by `\n`
982986
will always be considered a single newline (which may be reasonable for
983987
[reading files][] with `\r\n` line delimiter). **Default:** `100`.
988+
* `unicodeLineSeparators` {boolean} If `true`, `\u2028` and `\u2029` will be
989+
treated as end-of-line input. **Default:** `true`.
984990
* `escapeCodeTimeout` {number} The duration `readline` will wait for a
985991
character (when reading an ambiguous key sequence in milliseconds one that
986992
can both form a complete key sequence using the input read so far and can

lib/internal/readline/interface.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ const {
4444

4545
const {
4646
validateAbortSignal,
47+
validateBoolean,
4748
validateString,
4849
validateUint32,
4950
} = require('internal/validators');
@@ -86,6 +87,7 @@ const kMincrlfDelay = 100;
8687
* - \u2029 (Unicode 'PARAGRAPH SEPARATOR')
8788
*/
8889
const lineEnding = /\r?\n|\r(?!\n)|\u2028|\u2029/g;
90+
const crlfLineEnding = /\r?\n|\r(?!\n)/g;
8991

9092
const kLineObjectStream = Symbol('line object stream');
9193
const kQuestionCancel = Symbol('kQuestionCancel');
@@ -116,6 +118,7 @@ const kMoveUpOrHistoryPrev = Symbol('_moveUpOrHistoryPrev');
116118
const kInsertString = Symbol('_insertString');
117119
const kLine = Symbol('_line');
118120
const kLine_buffer = Symbol('_line_buffer');
121+
const kLineEnding = Symbol('_lineEnding');
119122
const kKillRing = Symbol('_killRing');
120123
const kKillRingCursor = Symbol('_killRingCursor');
121124
const kMoveCursor = Symbol('_moveCursor');
@@ -172,6 +175,7 @@ function InterfaceConstructor(input, output, completer, terminal) {
172175
let crlfDelay;
173176
let prompt = '> ';
174177
let signal;
178+
let unicodeLineSeparators = true;
175179

176180
if (input?.input) {
177181
// An options object was given
@@ -208,6 +212,13 @@ function InterfaceConstructor(input, output, completer, terminal) {
208212
}
209213

210214
crlfDelay = input.crlfDelay;
215+
if (input.unicodeLineSeparators !== undefined) {
216+
validateBoolean(
217+
input.unicodeLineSeparators,
218+
'options.unicodeLineSeparators',
219+
);
220+
unicodeLineSeparators = input.unicodeLineSeparators;
221+
}
211222
input = input.input;
212223

213224
input.size = historySize;
@@ -250,6 +261,7 @@ function InterfaceConstructor(input, output, completer, terminal) {
250261
MathMax(kMincrlfDelay, crlfDelay) :
251262
kMincrlfDelay;
252263
this.completer = completer;
264+
this[kLineEnding] = unicodeLineSeparators ? lineEnding : crlfLineEnding;
253265

254266
this.setPrompt(prompt);
255267

@@ -623,6 +635,7 @@ class Interface extends InterfaceConstructor {
623635
}
624636

625637
// Run test() on the new string chunk, not on the entire line buffer.
638+
const lineEnding = this[kLineEnding];
626639
let newPartContainsEnding = RegExpPrototypeExec(lineEnding, string);
627640
if (newPartContainsEnding !== null) {
628641
if (this[kLine_buffer]) {
@@ -1530,6 +1543,7 @@ class Interface extends InterfaceConstructor {
15301543
default:
15311544
if (typeof s === 'string' && s) {
15321545
// Erase state of previous searches.
1546+
const lineEnding = this[kLineEnding];
15331547
lineEnding.lastIndex = 0;
15341548
let nextMatch;
15351549
// Keep track of the end of the last match.

lib/readline.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ Interface.prototype.question[promisify.custom] = function question(query, option
199199
* removeHistoryDuplicates?: boolean;
200200
* prompt?: string;
201201
* crlfDelay?: number;
202+
* unicodeLineSeparators?: boolean;
202203
* escapeCodeTimeout?: number;
203204
* tabSize?: number;
204205
* signal?: AbortSignal;

test/parallel/test-readline-line-separators.js

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,45 @@ const { Readable } = require('node:stream');
66

77
const str = '012\n345\r67\r\n89\u{2028}ABC\u{2029}DEF';
88

9-
const rli = new readline.Interface({
10-
input: Readable.from(str),
11-
});
9+
// Unicode line and paragraph separators are line endings by default.
10+
{
11+
const rli = new readline.Interface({
12+
input: Readable.from(str),
13+
});
1214

13-
const linesRead = [];
14-
rli.on('line', (line) => linesRead.push(line));
15+
const linesRead = [];
16+
rli.on('line', (line) => linesRead.push(line));
1517

16-
rli.on('close', common.mustCall(() => {
17-
assert.deepStrictEqual(linesRead, ['012', '345', '67', '89', 'ABC', 'DEF']);
18-
}));
18+
rli.on('close', common.mustCall(() => {
19+
assert.deepStrictEqual(linesRead, ['012', '345', '67', '89', 'ABC', 'DEF']);
20+
}));
21+
}
22+
23+
// The option allows file formats such as JSONL to keep Unicode separators
24+
// inside record contents while still splitting on CR, LF, and CRLF.
25+
{
26+
const rli = new readline.Interface({
27+
input: Readable.from(str),
28+
unicodeLineSeparators: false,
29+
});
30+
31+
const linesRead = [];
32+
rli.on('line', (line) => linesRead.push(line));
33+
34+
rli.on('close', common.mustCall(() => {
35+
assert.deepStrictEqual(
36+
linesRead,
37+
['012', '345', '67', '89\u2028ABC\u2029DEF'],
38+
);
39+
}));
40+
}
41+
42+
assert.throws(
43+
() => new readline.Interface({
44+
input: Readable.from(str),
45+
unicodeLineSeparators: 'false',
46+
}),
47+
{
48+
code: 'ERR_INVALID_ARG_TYPE',
49+
},
50+
);

0 commit comments

Comments
 (0)