From fb32c84059fc0d4aaf06ed8ffcbd1dc84c91198c Mon Sep 17 00:00:00 2001 From: mwang87 Date: Mon, 16 Mar 2026 15:55:21 -0700 Subject: [PATCH] upadting formula distribution --- massql/msql.ebnf | 2 +- massql/msql_parser.py | 3 +++ ...MA___61a2da302d53487ba89667bb7b7433a3.json | 2 +- ...RO___8ee2287663d6ad552d20963061530a3b.json | 2 +- ...RO___4fe070795d41b0ea705398a6e5d940ab.json | 2 +- ...RO___09ccf26b267546b40a6acd472bbea037.json | 2 +- ...ax___ce48d94145b8fe9dd12d4257ffda9ad1.json | 2 +- tests/test_parse.py | 19 +++++++++++++++++++ 8 files changed, 28 insertions(+), 6 deletions(-) diff --git a/massql/msql.ebnf b/massql/msql.ebnf index 1f6f03f0..5543e00e 100644 --- a/massql/msql.ebnf +++ b/massql/msql.ebnf @@ -146,7 +146,7 @@ factor: floating | "formula(" moleculeformula ")" | "aminoaciddelta(" aminoacids ")" | peptidefunction - | "(" numericalexpression ")" + | "(" numericalexpression ")" -> factor_parens peptidefunction: "peptide(" peptide "," "charge=" peptidecharge "," "ion=" peptideion ")" multiply: "*" divide: "/" diff --git a/massql/msql_parser.py b/massql/msql_parser.py index 12838ffe..32a2e9f4 100644 --- a/massql/msql_parser.py +++ b/massql/msql_parser.py @@ -367,6 +367,9 @@ def divide(self, s): def factor(self, s): return s[0] + def factor_parens(self, items): + return f"({items[0]})" + def term(self, items): if len(items) == 1: return items[0] diff --git a/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS1MZ_X-2INTENSITYMA___61a2da302d53487ba89667bb7b7433a3.json b/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS1MZ_X-2INTENSITYMA___61a2da302d53487ba89667bb7b7433a3.json index d59e89eb..1523d728 100644 --- a/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS1MZ_X-2INTENSITYMA___61a2da302d53487ba89667bb7b7433a3.json +++ b/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS1MZ_X-2INTENSITYMA___61a2da302d53487ba89667bb7b7433a3.json @@ -6,7 +6,7 @@ "qualifierintensitymatch": { "comparator": "equal", "name": "qualifierintensitymatch", - "value": "Y*0.0608+2e-06*X" + "value": "Y*(0.0608+(2e-06*X))" }, "qualifierintensitytolpercent": { "comparator": "equal", diff --git a/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS2PROD_X_AND_MS2PRO___8ee2287663d6ad552d20963061530a3b.json b/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS2PROD_X_AND_MS2PRO___8ee2287663d6ad552d20963061530a3b.json index 4196e036..92a3f5ce 100644 --- a/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS2PROD_X_AND_MS2PRO___8ee2287663d6ad552d20963061530a3b.json +++ b/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS2PROD_X_AND_MS2PRO___8ee2287663d6ad552d20963061530a3b.json @@ -11,7 +11,7 @@ "conditiontype": "where", "type": "ms2productcondition", "value": [ - "2.0*X-55.9349375" + "2.0*(X-55.9349375)" ] } ], diff --git a/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS2PROD_X_And_MS2PRO___4fe070795d41b0ea705398a6e5d940ab.json b/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS2PROD_X_And_MS2PRO___4fe070795d41b0ea705398a6e5d940ab.json index d52f5394..59fbbb79 100644 --- a/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS2PROD_X_And_MS2PRO___4fe070795d41b0ea705398a6e5d940ab.json +++ b/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS2PROD_X_And_MS2PRO___4fe070795d41b0ea705398a6e5d940ab.json @@ -11,7 +11,7 @@ "conditiontype": "where", "type": "ms2productcondition", "value": [ - "2.0*X-55.9349375" + "2.0*(X-55.9349375)" ] } ], diff --git a/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS2PROD_X_and_MS2PRO___09ccf26b267546b40a6acd472bbea037.json b/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS2PROD_X_and_MS2PRO___09ccf26b267546b40a6acd472bbea037.json index 9a9de34b..8c6673fb 100644 --- a/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS2PROD_X_and_MS2PRO___09ccf26b267546b40a6acd472bbea037.json +++ b/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_MS2PROD_X_and_MS2PRO___09ccf26b267546b40a6acd472bbea037.json @@ -11,7 +11,7 @@ "conditiontype": "where", "type": "ms2productcondition", "value": [ - "2.0*X-55.9349375" + "2.0*(X-55.9349375)" ] } ], diff --git a/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_X_range_min_100,_max___ce48d94145b8fe9dd12d4257ffda9ad1.json b/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_X_range_min_100,_max___ce48d94145b8fe9dd12d4257ffda9ad1.json index de58f586..07fa3c1e 100644 --- a/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_X_range_min_100,_max___ce48d94145b8fe9dd12d4257ffda9ad1.json +++ b/tests/reference_parses/QUERY_scaninfo_MS2DATA__WHERE_X_range_min_100,_max___ce48d94145b8fe9dd12d4257ffda9ad1.json @@ -12,7 +12,7 @@ "qualifierintensitymatch": { "comparator": "equal", "name": "qualifierintensitymatch", - "value": "Y*0.0608+2e-06*X" + "value": "Y*(0.0608+(2e-06*X))" }, "qualifierintensitytolpercent": { "comparator": "equal", diff --git a/tests/test_parse.py b/tests/test_parse.py index 32b9b6d5..84e9d133 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -112,6 +112,25 @@ def test_variable_formula_parse2(): parsed_output = msql_parser.parse_msql(query) print(json.dumps(parsed_output, indent=4)) +def test_formula_distribution(): + """Verify that formula() is correctly distributed through multiplication in X-expressions. + + Previously, 2*(X - formula(Fe)) was parsed as 2*X - 55.93 instead of 2*(X - 55.93), + which is a ~56 Da error when evaluated. + """ + query = "QUERY scaninfo(MS2DATA) WHERE MS2PROD=X AND MS2PROD=2.0*(X - formula(Fe))" + parsed_output = msql_parser.parse_msql(query) + + value = parsed_output["conditions"][1]["value"][0] + assert value == "2.0*(X-55.9349375)", f"Expected parenthesized form, got: {value}" + + # Verify the expression evaluates correctly + from py_expression_eval import Parser + p = Parser() + result = p.parse(value).evaluate({"X": 100.0}) + expected = 2.0 * (100.0 - 55.9349375) + assert abs(result - expected) < 1e-6, f"Expected {expected}, got {result}" + def test_xrange_parse(): query = "QUERY scaninfo(MS2DATA) WHERE MS2PROD=X AND MS2PROD=2.0*(X - formula(Fe)) AND X=range(min=5, max=100)"