Skip to content

Commit 4ee7989

Browse files
committed
Speed decoded charset text properties
1 parent 0550597 commit 4ee7989

8 files changed

Lines changed: 289 additions & 78 deletions

File tree

neovm-core/src/buffer/text_props.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,16 @@ impl TextPropertyTable {
263263
changed
264264
}
265265

266+
pub(crate) fn from_plist_runs(runs: Vec<(usize, usize, Vec<(Value, Value)>)>) -> Self {
267+
let mut table = Self::new();
268+
table.rebuild_from_runs(
269+
runs.into_iter()
270+
.map(|(start, end, plist)| IntervalRun::new(start, end, plist))
271+
.collect(),
272+
);
273+
table
274+
}
275+
266276
pub fn get_property(&self, pos: usize, name: Value) -> Option<&Value> {
267277
let idx = self.interval_containing_index(pos)?;
268278
plist_get(&self.nodes[idx].plist, name)
@@ -517,6 +527,7 @@ impl TextPropertyTable {
517527
fn rebuild_from_runs(&mut self, runs: Vec<IntervalRun>) {
518528
let runs = normalize_runs(runs);
519529
self.nodes.clear();
530+
self.nodes.reserve(runs.len());
520531
self.root = self.build_subtree(&runs, IntervalParent::Object);
521532
}
522533

neovm-core/src/emacs_core/coding.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,7 @@ impl CodingSystemManager {
659659
) -> Option<String> {
660660
let normalized = normalize_coding_name_for_lookup(name);
661661
if EolType::from_suffix(normalized).is_some() {
662-
return canonical_runtime_name(self, normalized);
662+
return resolve_runtime_name(self, normalized);
663663
}
664664

665665
let eol = match eol_suffix {

neovm-core/src/emacs_core/coding_test.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ fn canonical_name_for_detected_eol_matches_gnu_alias_resolution() {
170170
assert_eq!(
171171
m.canonical_name_for_detected_eol("cn-gb-2312-unix", "-dos")
172172
.as_deref(),
173-
Some("chinese-iso-8bit-unix")
173+
Some("cn-gb-2312-unix")
174174
);
175175
}
176176

neovm-core/src/emacs_core/fileio.rs

Lines changed: 83 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ use std::sync::Once;
1515
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
1616
use std::time::{SystemTime, UNIX_EPOCH};
1717

18+
use crate::buffer::text_props::TextPropertyTable;
19+
use crate::heap_types::LispString;
20+
1821
use super::error::{EvalResult, Flow, signal};
1922
use super::eval::Context;
2023
use super::intern::{intern, resolve_sym};
@@ -3496,7 +3499,8 @@ fn current_buffer_id_or_error(
34963499
fn replace_accessible_portion_in_current_buffer(
34973500
buffers: &mut crate::buffer::BufferManager,
34983501
current_id: crate::buffer::BufferId,
3499-
text: &str,
3502+
text: &LispString,
3503+
text_props: Option<&TextPropertyTable>,
35003504
) -> Result<(), Flow> {
35013505
let (start, end, old_point) = {
35023506
let buf = buffers
@@ -3514,10 +3518,15 @@ fn replace_accessible_portion_in_current_buffer(
35143518
.ok_or_else(|| signal("error", vec![Value::string("No current buffer")]))?;
35153519
if !text.is_empty() {
35163520
buffers
3517-
.insert_into_buffer(current_id, text)
3521+
.insert_lisp_string_into_buffer(current_id, text)
35183522
.ok_or_else(|| signal("error", vec![Value::string("No current buffer")]))?;
3523+
if let Some(table) = text_props {
3524+
buffers
3525+
.append_buffer_text_properties(current_id, table, start)
3526+
.ok_or_else(|| signal("error", vec![Value::string("No current buffer")]))?;
3527+
}
35193528
}
3520-
let replacement_end = start + text.len();
3529+
let replacement_end = start + text.sbytes();
35213530
let restored_point = if old_point <= start {
35223531
old_point
35233532
} else {
@@ -3532,11 +3541,12 @@ fn replace_accessible_portion_in_current_buffer(
35323541
fn insert_file_contents_into_current_buffer_in_state(
35333542
buffers: &mut crate::buffer::BufferManager,
35343543
current_id: crate::buffer::BufferId,
3535-
contents: &str,
3544+
contents: &LispString,
3545+
text_props: Option<&TextPropertyTable>,
35363546
replace_requested: bool,
35373547
) -> Result<(), Flow> {
35383548
if replace_requested {
3539-
replace_accessible_portion_in_current_buffer(buffers, current_id, contents)
3549+
replace_accessible_portion_in_current_buffer(buffers, current_id, contents, text_props)
35403550
} else {
35413551
// GNU Emacs: insert-file-contents inserts text at point but does NOT
35423552
// advance point past the inserted text (unlike regular `insert`).
@@ -3547,8 +3557,13 @@ fn insert_file_contents_into_current_buffer_in_state(
35473557
.map(|b| (b.pt_byte, b.pt))
35483558
.unwrap_or((0, 0));
35493559
buffers
3550-
.insert_into_buffer(current_id, contents)
3560+
.insert_lisp_string_into_buffer(current_id, contents)
35513561
.ok_or_else(|| signal("error", vec![Value::string("No current buffer")]))?;
3562+
if let Some(table) = text_props {
3563+
buffers
3564+
.append_buffer_text_properties(current_id, table, pt_before.0)
3565+
.ok_or_else(|| signal("error", vec![Value::string("No current buffer")]))?;
3566+
}
35523567
// Restore point to before the insertion (matching GNU).
35533568
if let Some(buf) = buffers.get_mut(current_id) {
35543569
buf.pt_byte = pt_before.0;
@@ -3719,13 +3734,46 @@ fn write_region_content_in_state(
37193734
Ok(buf.buffer_substring_lisp_string(byte_start, byte_end))
37203735
}
37213736

3737+
struct DecodedFileContents {
3738+
value: Value,
3739+
coding: String,
3740+
}
3741+
3742+
impl DecodedFileContents {
3743+
fn from_lisp_string(text: LispString, coding: String) -> Self {
3744+
Self {
3745+
value: Value::heap_string(text),
3746+
coding,
3747+
}
3748+
}
3749+
3750+
fn from_multibyte_string(text: String, coding: String) -> Self {
3751+
Self::from_lisp_string(LispString::new(text, true), coding)
3752+
}
3753+
3754+
fn text(&self) -> &LispString {
3755+
self.value
3756+
.as_lisp_string()
3757+
.expect("decoded file contents must be a Lisp string")
3758+
}
3759+
3760+
fn text_properties(&self) -> Option<&TextPropertyTable> {
3761+
let table = self.text().intervals();
3762+
if table.is_empty() { None } else { Some(table) }
3763+
}
3764+
3765+
fn char_count(&self) -> i64 {
3766+
self.text().schars() as i64
3767+
}
3768+
}
3769+
37223770
fn decode_insert_file_contents(
37233771
coding_systems: &crate::emacs_core::coding::CodingSystemManager,
37243772
bytes: &[u8],
37253773
multibyte: bool,
37263774
source_load_context: bool,
37273775
coding_system_for_read: Option<&str>,
3728-
) -> Result<(String, String), Flow> {
3776+
) -> Result<DecodedFileContents, Flow> {
37293777
let detected_default_eol_suffix = |bytes: &[u8]| {
37303778
let mut saw_lf = false;
37313779
let mut saw_crlf = false;
@@ -3772,33 +3820,40 @@ fn decode_insert_file_contents(
37723820
else {
37733821
if source_load_context && multibyte {
37743822
let eol_suffix = detected_default_eol_suffix(bytes);
3775-
return Ok((
3776-
crate::encoding::decode_bytes(bytes, &format!("utf-8-emacs{eol_suffix}")),
3777-
format!("utf-8-emacs{eol_suffix}"),
3823+
let coding = format!("utf-8-emacs{eol_suffix}");
3824+
return Ok(DecodedFileContents::from_multibyte_string(
3825+
crate::encoding::decode_bytes(bytes, &coding),
3826+
coding,
37783827
));
37793828
}
37803829

37813830
if !multibyte {
3782-
return Ok((
3783-
crate::emacs_core::string_escape::bytes_to_unibyte_storage_string(bytes),
3831+
return Ok(DecodedFileContents::from_lisp_string(
3832+
LispString::from_unibyte(bytes.to_vec()),
37843833
"no-conversion".to_string(),
37853834
));
37863835
}
37873836

37883837
let eol_suffix = detected_default_eol_suffix(bytes);
37893838
if bytes.is_ascii() {
37903839
let coding = format!("undecided{eol_suffix}");
3791-
return Ok((crate::encoding::decode_bytes(bytes, &coding), coding));
3840+
return Ok(DecodedFileContents::from_multibyte_string(
3841+
crate::encoding::decode_bytes(bytes, &coding),
3842+
coding,
3843+
));
37923844
}
37933845
if std::str::from_utf8(bytes).is_ok() {
37943846
let coding = format!("utf-8{eol_suffix}");
3795-
return Ok((crate::encoding::decode_bytes(bytes, &coding), coding));
3847+
return Ok(DecodedFileContents::from_multibyte_string(
3848+
crate::encoding::decode_bytes(bytes, &coding),
3849+
coding,
3850+
));
37963851
}
37973852

37983853
let eol_suffix = detected_default_eol_suffix(bytes);
37993854
let coding = format!("utf-8-emacs{eol_suffix}");
38003855
let decoded = crate::encoding::decode_bytes(bytes, &coding);
3801-
return Ok((decoded, coding));
3856+
return Ok(DecodedFileContents::from_multibyte_string(decoded, coding));
38023857
};
38033858

38043859
let eol_suffix = detected_default_eol_suffix(bytes);
@@ -3807,7 +3862,10 @@ fn decode_insert_file_contents(
38073862
.unwrap_or_else(|| coding.to_string());
38083863

38093864
if source_load_context && multibyte && is_utf8_like_source_coding(&coding) {
3810-
return Ok((crate::emacs_core::load::decode_emacs_utf8(bytes), coding));
3865+
return Ok(DecodedFileContents::from_multibyte_string(
3866+
crate::emacs_core::load::decode_emacs_utf8(bytes),
3867+
coding,
3868+
));
38113869
}
38123870

38133871
let decoded = crate::encoding::builtin_decode_coding_string_with_known(
@@ -3819,7 +3877,10 @@ fn decode_insert_file_contents(
38193877
)?;
38203878

38213879
match decoded.kind() {
3822-
ValueKind::String => Ok((fileio_owned_runtime_string(decoded), coding)),
3880+
ValueKind::String => Ok(DecodedFileContents {
3881+
value: decoded,
3882+
coding,
3883+
}),
38233884
other => Err(signal(
38243885
"error",
38253886
vec![Value::string(format!(
@@ -4052,7 +4113,7 @@ pub(crate) fn builtin_insert_file_contents(
40524113
} else {
40534114
None
40544115
};
4055-
let (contents, used_coding) = decode_insert_file_contents(
4116+
let contents = decode_insert_file_contents(
40564117
&eval.coding_systems,
40574118
slice,
40584119
multibyte,
@@ -4061,18 +4122,19 @@ pub(crate) fn builtin_insert_file_contents(
40614122
.as_deref()
40624123
.or(auto_coding_system.as_deref()),
40634124
)?;
4064-
let decoded_char_count = contents.chars().count() as i64;
4125+
let decoded_char_count = contents.char_count();
40654126

40664127
insert_file_contents_into_current_buffer_in_state(
40674128
&mut eval.buffers,
40684129
current_id,
4069-
&contents,
4130+
contents.text(),
4131+
contents.text_properties(),
40704132
replace_requested,
40714133
)?;
40724134

40734135
// GNU `insert-file-contents' sets `last-coding-system-used' before
40744136
// `after-insert-file-set-coding' derives `buffer-file-coding-system'.
4075-
eval.set_variable("last-coding-system-used", Value::symbol(&used_coding));
4137+
eval.set_variable("last-coding-system-used", Value::symbol(&contents.coding));
40764138

40774139
let inserted_char_count = run_after_insert_file_pipeline(
40784140
eval,

0 commit comments

Comments
 (0)