diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b9b2f00..0f74e683 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [Unreleased] +### Added + +- Added support for `not in` and `is not` compound operators. + ## [0.7.0] - 2025-11-11 ### Added diff --git a/core/CHANGELOG.md b/core/CHANGELOG.md index 417eabaa..11502875 100644 --- a/core/CHANGELOG.md +++ b/core/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- Added support for `not in` and `is not` compound operators. + ## 0.7.0 - 2025-11-11 ### Added diff --git a/core/datatests/generators/optimising_line_formatter.rs b/core/datatests/generators/optimising_line_formatter.rs index d7d0f00d..b0b0510c 100644 --- a/core/datatests/generators/optimising_line_formatter.rs +++ b/core/datatests/generators/optimising_line_formatter.rs @@ -361,6 +361,7 @@ mod comments { child_lines::generate(root_dir); conditional_directives::generate(root_dir); individual_block::generate(root_dir); + compound_operators::generate(root_dir); } mod midline_line { @@ -680,6 +681,44 @@ mod comments { ); } } + + mod compound_operators { + use super::*; + + pub fn generate(root_dir: &Path) { + generate_test_cases!( + root_dir, + not_in = " + AA := AAA {} not {} in {} BBB; + AAA := + AAA {} not {} in {} BBB; + AAA := + AAAAA {} + not {} in {} BBBB; + AAA := + AAAAA {} + not {} in {} BBBBBBBBB; + AAA := + AAAAA + {} + not + {} in + {} BBBBBBBBB; + AAA := + AAAAA + { + } + not + { + } + in + { + } + BBBBBBBBB; + ", + ); + } + } } mod anonymous { @@ -4312,6 +4351,51 @@ mod expressions { and DDDDDDD; ", + compound = " + A := AAAAAAAA not in BBBBBBBB; + A := + AAAAAAAA not in BBBBBBBBB; + A := + AAAAAAAAA + not in BBBBBBBBB; + A := + AAAAAAAAA + not in BBBBBBBBBBBBBBB; + A := + AAAAAA + BBBBBB + CCCCCCCC + not in DDDDDDDDDDDDDD; + A := + AAAAAA + BBBBBB + CCCCCCCC + not in DDDDD + EEEEEE; + A := + AAAAAAA + + BBBBBBB + + CCCCCCC + not in DDDDDDDDDDDDDD; + A := + AAAAAA + BBBBBB + CCCCCCCC + not in DDDDDD + + EEEEEE; + A := AAAAAAAA is not BBBBBBBB; + A := + AAAAAAAA is not BBBBBBBBB; + A := + AAAAAAAAA + is not BBBBBBBBB; + A := + AAAAAAAAA + is not BBBBBBBBBBBBBBB; + A := (AAA not in [DDD + EEE]); + A := + (AAAA not in [DDD + EEE]); + A := + (AAAAA + not in [DDD + EEE]); + A := + (AAAAA + not in [ + DDDDD + EEEE]); + ", ); } } diff --git a/core/src/rules/optimising_line_formatter/contexts.rs b/core/src/rules/optimising_line_formatter/contexts.rs index 7c6939c4..e3ba2a33 100644 --- a/core/src/rules/optimising_line_formatter/contexts.rs +++ b/core/src/rules/optimising_line_formatter/contexts.rs @@ -418,6 +418,15 @@ impl<'a> SpecificContextStack<'a> { ) .cloned() } + fn get_next_real_token_type_from_line_index(&self, line_index: u32) -> Option { + self.formatting_contexts + .line + .get_tokens() + .iter() + .skip(line_index as usize + 1) + .map(|index| self.formatting_contexts.token_types[*index]) + .find(|token_type| !token_type.is_comment_or_compiler_directive()) + } /// Updates all contexts to reflect the decision provided. pub(super) fn update_contexts(&self, node: &mut FormattingNode, decision: RawDecision) { @@ -647,8 +656,20 @@ impl<'a> SpecificContextStack<'a> { _ => {} } } + (Some(op1), Some(op2)) if (op1, op2).get_operator_precedence().is_some() => { + // In the middle of a compound operator, do nothing + } + (_, Some(op @ (TT::Op(_) | TT::Keyword(_)))) + if self + .get_next_real_token_type_from_line_index(line_index) + .is_some_and(|token_type| { + (op, token_type).get_operator_precedence().is_some() + }) => + { + self.update_operator_precedences(node, is_break); + } (prev, Some(op @ (TT::Op(_) | TT::Keyword(_)))) - if super::get_operator_precedence(op).is_some() && is_binary(op, prev) => + if op.get_operator_precedence().is_some() && is_binary(op, prev) => { self.update_operator_precedences(node, is_break); } @@ -679,12 +700,13 @@ impl<'a> SpecificContextStack<'a> { { if let Some(data) = Rc::make_mut(&mut node.context_data).get_mut(ctx_index) { match ctx.context_type { - CT::ConditionalDirective => { - if curr_token_type.is_some_and(|t| !t.is_comment_or_compiler_directive()) { - data.can_break &= data.is_child_broken | is_break; - data.one_element_per_line - .get_or_insert(data.is_child_broken | is_break); - } + CT::ConditionalDirective + if curr_token_type + .is_some_and(|t| !t.is_comment_or_compiler_directive()) => + { + data.can_break &= data.is_child_broken | is_break; + data.one_element_per_line + .get_or_insert(data.is_child_broken | is_break); } CT::TypedAssignment | CT::ForLoop | CT::RaiseAt => { data.is_broken |= data.is_child_broken @@ -700,15 +722,11 @@ impl<'a> SpecificContextStack<'a> { data.one_element_per_line = Some(true); } } - CT::MemberAccess => { - if is_break { - data.one_element_per_line = Some(true); - } + CT::MemberAccess if is_break => { + data.one_element_per_line = Some(true); } - CT::CommaElem | CT::AssignRHS => { - if is_break { - data.break_anonymous_routine = Some(true); - } + CT::CommaElem | CT::AssignRHS if is_break => { + data.break_anonymous_routine = Some(true); } _ => {} } @@ -778,12 +796,6 @@ impl<'a> LineFormattingContexts<'a> { token_types: &'a [TokenType], context_tree: &'a ParentPointerTree, ) -> Self { - let get_token_type_from_line_index = |line_index| { - token_types - .get(*line.get_tokens().get(line_index as usize)?) - .cloned() - }; - let builder_context_tree = Self::new_tree(); let mut contexts = LineFormattingContextsBuilder::new(&builder_context_tree); @@ -825,18 +837,31 @@ impl<'a> LineFormattingContexts<'a> { } } - let mut prev_prev_token_type = None; - let mut prev_token_type = None; - let mut prev_semantic_token_type = None; - let mut current = get_token_type_from_line_index(0); - let mut next_token_type = get_token_type_from_line_index(1); + let mut prev_token_types = Vec::with_capacity(line.get_tokens().len()); + let mut prev_semantic_token_types = Vec::with_capacity(line.get_tokens().len()); + let mut next_token_types = line + .get_tokens() + .iter() + .rev() + .map(|id| token_types[*id]) + .collect::>(); + let mut current = next_token_types.pop(); + + fn next_real_token_type(token_types: &[TokenType]) -> Option { + token_types + .iter() + .rev() + .find(|token_type| !token_type.is_comment_or_compiler_directive()) + .cloned() + } + while let Some(current_token_type) = current { if !current_token_type.is_comment_or_compiler_directive() { let last_context_type = contexts.current_context.get().context_type; // New contexts relating to the previous token are pushed here // to avoid including any leading comments if let (Some(prev_token_type), Some(prev_directive_token_type)) = - (prev_token_type, prev_semantic_token_type) + (prev_token_types.last(), prev_semantic_token_types.last()) { match (prev_token_type, last_context_type) { (TT::Op(OK::LParen | OK::LBrack | OK::LessThan(ChK::Generic)), _) @@ -911,7 +936,10 @@ impl<'a> LineFormattingContexts<'a> { contexts.push_expression(); } TT::Keyword(KK::Abstract) - if matches!(prev_prev_token_type, Some(TT::Keyword(KK::Class))) => {} + if matches!( + prev_token_types.iter().rev().nth(1), + Some(TT::Keyword(KK::Class)) + ) => {} TT::Keyword(kk) if kk.is_directive() => { contexts.push_expression(); } @@ -936,8 +964,24 @@ impl<'a> LineFormattingContexts<'a> { TT::ConditionalDirective(kind) if kind.is_else() => { contexts.push_operators(); } - op if super::get_operator_precedence(op).is_some() - && is_binary(op, prev_prev_token_type) => + op if (*op, current_token_type) + .get_operator_precedence() + .is_some() => + { + // In the middle of a compound operator, do nothing + } + op if prev_token_types + .iter() + .rev() + .nth(1) + .cloned() + .and_then(|prev| (prev, *op).get_operator_precedence()) + .is_some() => + { + contexts.push_operators(); + } + op if op.get_operator_precedence().is_some() + && is_binary(*op, prev_token_types.iter().rev().nth(1).cloned()) => { contexts.push_operators(); } @@ -955,7 +999,7 @@ impl<'a> LineFormattingContexts<'a> { TT::Op(OK::LessThan(ChevronKind::Generic)) => BracketKind::Angle, _ => BracketKind::Round, }; - let (typ, cont_delta) = match prev_token_type { + let (typ, cont_delta) = match prev_token_types.last() { // routine invocations Some(TT::Identifier | TT::Op(OK::GreaterThan(ChevronKind::Generic))) => { (BracketStyle::BreakClose, 1) @@ -1095,31 +1139,52 @@ impl<'a> LineFormattingContexts<'a> { contexts.last_context_mut().context_type = CT::RaiseAt; contexts.push(CT::Subject); } - TT::Op(OK::Dot) => { - if CT::Precedence(0) == last_context_type { - contexts.retain_current(); - if matches!(prev_token_type, Some(TT::Op(OK::RParen | OK::RBrack))) { - /* - Fluency is considered after () and [] because - they allow for arbitrary computation which will - harm the readability of a chained expression on - a single line. - - <> and just pure names are more likely to be - simple. Generics are seen as an extension of the - identifier. - */ - contexts.fluent(contexts.current_context.clone()); - } + TT::Op(OK::Dot) if CT::Precedence(0) == last_context_type => { + contexts.retain_current(); + if matches!( + prev_token_types.last(), + Some(TT::Op(OK::RParen | OK::RBrack)) + ) { + /* + Fluency is considered after () and [] because + they allow for arbitrary computation which will + harm the readability of a chained expression on + a single line. + + <> and just pure names are more likely to be + simple. Generics are seen as an extension of the + identifier. + */ + contexts.fluent(contexts.current_context.clone()); } } - op if super::get_operator_precedence(op).is_some() - && is_binary(op, prev_token_type) => + + op if prev_token_types + .last() + .cloned() + .and_then(|prev| (prev, op).get_operator_precedence()) + .is_some() => + { + // We are in the middle of a compound operator, do nothing + } + op if next_real_token_type(&next_token_types) + .and_then(|next| (op, next).get_operator_precedence()) + .is_some() => + { + let op_prec = next_real_token_type(&next_token_types) + .and_then(|next| (op, next).get_operator_precedence()) + .unwrap(); + contexts.pop_until_and_retain(CT::Precedence(op_prec)); + } + op if op.get_operator_precedence().is_some() + && is_binary(op, prev_token_types.last().cloned()) => { - let op_prec = super::get_operator_precedence(op).unwrap(); + let op_prec = op.get_operator_precedence().unwrap(); contexts.pop_until_and_retain(CT::Precedence(op_prec)); } - TT::Keyword(KK::Of) if matches!(next_token_type, Some(TT::Keyword(KK::Object))) => { + TT::Keyword(KK::Of) + if matches!(next_token_types.last(), Some(TT::Keyword(KK::Object))) => + { contexts.pop_until_after(CT::AnonHeader); } TT::Keyword(KK::Then | KK::Do | KK::Of) => { @@ -1140,7 +1205,7 @@ impl<'a> LineFormattingContexts<'a> { contexts.pop_until_after(CT::AnonHeader); } TT::Keyword(KK::Abstract) - if matches!(prev_token_type, Some(TT::Keyword(KK::Class))) => {} + if matches!(prev_token_types.last(), Some(TT::Keyword(KK::Class))) => {} TT::Keyword(kk) if kk.is_directive() => { if contexts.pop_until(CT::DirectiveList) != Some(CT::DirectiveList) { if contexts @@ -1186,14 +1251,12 @@ impl<'a> LineFormattingContexts<'a> { } if !current_token_type.is_comment_or_directive() { - prev_prev_token_type = prev_token_type; - prev_token_type = current; + prev_token_types.extend(current); } if !current_token_type.is_comment_or_compiler_directive() { - prev_semantic_token_type = current; + prev_semantic_token_types.extend(current); } - current = next_token_type; - next_token_type = get_token_type_from_line_index(contexts.line_index + 1); + current = next_token_types.pop(); } contexts.finalise(); @@ -2165,6 +2228,17 @@ mod tests { 1 Precedence(3) ^----------- 1 Precedence(2) ^-----$ "}, + not_in_operator = {" + AA + BB not in CC + 1 Base ^---------------- + 1 Precedence(4) ^---------------- + 1 Precedence(3) ^-----$ + "}, + is_not_operator = {" + AA is not BB + 1 Base ^----------- + 1 Precedence(4) ^----------- + "}, routine_arguments = {" AA(BB, CC) + DD 1 Base ^-------------- diff --git a/core/src/rules/optimising_line_formatter/mod.rs b/core/src/rules/optimising_line_formatter/mod.rs index 6bd926e8..3f70c735 100644 --- a/core/src/rules/optimising_line_formatter/mod.rs +++ b/core/src/rules/optimising_line_formatter/mod.rs @@ -1258,41 +1258,57 @@ impl<'this> InternalOptimisingLineFormatter<'this, '_> { const HIGHEST_PRECEDENCE: u8 = 0; const LOWEST_PRECEDENCE: u8 = 5; -fn get_operator_precedence(token_type: TokenType) -> Option { - match token_type { - TT::Op(OK::Dot) => Some(0), +trait OperatorPrecedence { + fn get_operator_precedence(self) -> Option; +} - TT::Op(OK::AddressOf) | TT::Keyword(KK::Not) => Some(1), +impl OperatorPrecedence for TokenType { + fn get_operator_precedence(self) -> Option { + match self { + TT::Op(OK::Dot) => Some(0), - TT::Op(OK::Star | OK::Slash) - | TT::Keyword(KK::Div | KK::Mod | KK::And | KK::Shl | KK::Shr | KK::As) => Some(2), + TT::Op(OK::AddressOf) | TT::Keyword(KK::Not) => Some(1), - TT::Op(OK::Plus | OK::Minus) | TT::Keyword(KK::Or | KK::Xor) => Some(3), + TT::Op(OK::Star | OK::Slash) + | TT::Keyword(KK::Div | KK::Mod | KK::And | KK::Shl | KK::Shr | KK::As) => Some(2), - TT::Op( - OK::Equal(EqKind::Comp) - | OK::NotEqual - | OK::LessThan(ChK::Comp) - | OK::GreaterThan(ChK::Comp) - | OK::LessEqual - | OK::GreaterEqual, - ) - | TT::Keyword(KK::In(InKind::Op) | KK::Is) => Some(4), - // The import clause `in`s is most simply represented as a precedence - // relationship - TT::Keyword(KK::In(InKind::Import)) => Some(4), - TT::Op(OK::DotDot) => Some(5), - - TT::Op(_) - | TT::Identifier - | TT::Keyword(_) - | TT::TextLiteral(_) - | TT::NumberLiteral(_) - | TT::ConditionalDirective(_) - | TT::CompilerDirective - | TT::Comment(_) - | TT::Eof - | TT::Unknown => None, + TT::Op(OK::Plus | OK::Minus) | TT::Keyword(KK::Or | KK::Xor) => Some(3), + + TT::Op( + OK::Equal(EqKind::Comp) + | OK::NotEqual + | OK::LessThan(ChK::Comp) + | OK::GreaterThan(ChK::Comp) + | OK::LessEqual + | OK::GreaterEqual, + ) + | TT::Keyword(KK::In(InKind::Op) | KK::Is) => Some(4), + // The import clause `in`s is most simply represented as a precedence + // relationship + TT::Keyword(KK::In(InKind::Import)) => Some(4), + TT::Op(OK::DotDot) => Some(5), + + TT::Op(_) + | TT::Identifier + | TT::Keyword(_) + | TT::TextLiteral(_) + | TT::NumberLiteral(_) + | TT::ConditionalDirective(_) + | TT::CompilerDirective + | TT::Comment(_) + | TT::Eof + | TT::Unknown => None, + } + } +} +impl OperatorPrecedence for (TokenType, TokenType) { + fn get_operator_precedence(self) -> Option { + match self { + // Compound operators `not in` and `is not` + (TT::Keyword(KK::Not), op @ TT::Keyword(KK::In(InKind::Op))) + | (op @ TT::Keyword(KK::Is), TT::Keyword(KK::Not)) => op.get_operator_precedence(), + _ => None, + } } } diff --git a/core/src/rules/optimising_line_formatter/requirements.rs b/core/src/rules/optimising_line_formatter/requirements.rs index 8d352021..97c07604 100644 --- a/core/src/rules/optimising_line_formatter/requirements.rs +++ b/core/src/rules/optimising_line_formatter/requirements.rs @@ -1,10 +1,10 @@ use super::InternalOptimisingLineFormatter; use super::SpecificContextDataStack; use super::contexts::*; -use super::get_operator_precedence; use super::is_binary; use super::types::DecisionRequirement; use crate::lang::*; +use crate::rules::optimising_line_formatter::OperatorPrecedence; use super::contexts::ContextType as CT; use super::types::DecisionRequirement as DR; @@ -213,8 +213,11 @@ impl InternalOptimisingLineFormatter<'_, '_> { .get_last_context(CT::RaiseAt) .map(|(_, data)| data.is_broken | data.is_child_broken) .if_else_or_default(DR::MustBreak, DR::Indifferent), + (Some(op1), Some(op2)) if (op1, op2).get_operator_precedence().is_some() => { + DR::MustNotBreak + } (prev, Some(op @ (TT::Op(_) | TT::Keyword(_)))) - if get_operator_precedence(op).is_some() && is_binary(op, prev) => + if op.get_operator_precedence().is_some() && is_binary(op, prev) => { contexts_data .iter() @@ -239,7 +242,7 @@ impl InternalOptimisingLineFormatter<'_, '_> { .map(|(_, data)| data.is_broken | data.is_child_broken) .if_else_or_default(DR::MustBreak, DR::Indifferent), (Some(op @ (TT::Op(_) | TT::Keyword(_))), _) - if get_operator_precedence(op).is_some() => + if op.get_operator_precedence().is_some() => { DR::MustNotBreak }