diff --git a/documentation/sql/materialized_views.md b/documentation/sql/materialized_views.md index 668e178a..56c96824 100644 --- a/documentation/sql/materialized_views.md +++ b/documentation/sql/materialized_views.md @@ -473,11 +473,23 @@ DROP MATERIALIZED VIEW IF EXISTS orders_with_customers_mv; | Limitation | Details | |---------------------------------------------|----------------------------------------------------------------------| | **UNNEST JOIN** | Not supported in materialized views | +| **`RIGHT JOIN` / `FULL OUTER JOIN`** | Not supported (see below). Use `LEFT JOIN` with swapped table order. | | **Quota limits** | Community edition: max 3 views. Pro: limited. Enterprise: unlimited | | **Watcher dependency (ES license)** | Automatic enrich policy re-execution relies on Elasticsearch Watchers, which require an Elasticsearch Platinum or Enterprise license (see below) | | **Eventual consistency** | Data is eventually consistent based on refresh frequency and delay | | **Join cardinality** | JOINs use enrich policies which match on a single field | +### Supported JOIN types + +Only `INNER JOIN` and `LEFT JOIN` (`LEFT OUTER JOIN`) are supported for materialized views. + +The MV's ingest pipeline is driven by **writes to the main (left-hand) `FROM` table** — every joined table is enriched into the main-table document via an `EnrichProcessor`. There is no mechanism for the pipeline to fire from the right-hand side, so: + +- **`RIGHT JOIN A ON A.x = B.y`** cannot preserve unmatched rows of the joined table when no matching main-table row triggers the pipeline. Rewrite the query with the right-hand table as the main `FROM` table and use `LEFT JOIN`. +- **`FULL OUTER JOIN`** needs to preserve rows from both sides, which the single-direction enrichment pipeline cannot do. + +Attempting to create a materialized view with `RIGHT JOIN` or `FULL OUTER JOIN` fails at creation time with an actionable error message; no partial artifacts are deployed. + ### Watcher Dependency and Elasticsearch Licensing Materialized views with JOINs rely on **enrich policies** to denormalize data from lookup tables into the view. When data in a lookup table (e.g. `customers`) changes, the corresponding enrich policy must be **re-executed** so that new documents flowing through the ingest pipeline pick up the updated values. diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/From.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/From.scala index 8618ef42..4566ccbc 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/From.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/From.scala @@ -37,11 +37,21 @@ sealed trait JoinType extends TokenRegex case object InnerJoin extends Expr("INNER") with JoinType -case object LeftJoin extends Expr("LEFT") with JoinType +// ANSI SQL allows the optional `OUTER` keyword after LEFT / RIGHT / FULL — `LEFT OUTER JOIN` +// is the same as `LEFT JOIN`. The `words` override extends the parser-side regex to accept +// both forms while keeping the SQL round-trip rendering (`sql` value) on the short form. +// Longer alternatives come first so regex alternation prefers them on longest-match. +case object LeftJoin extends Expr("LEFT") with JoinType { + override def words: List[String] = List("LEFT\\s+OUTER", "LEFT") +} -case object RightJoin extends Expr("RIGHT") with JoinType +case object RightJoin extends Expr("RIGHT") with JoinType { + override def words: List[String] = List("RIGHT\\s+OUTER", "RIGHT") +} -case object FullJoin extends Expr("FULL") with JoinType +case object FullJoin extends Expr("FULL") with JoinType { + override def words: List[String] = List("FULL\\s+OUTER", "FULL") +} case object CrossJoin extends Expr("CROSS") with JoinType @@ -245,11 +255,6 @@ case class StandardJoin( override def validate(): Either[String, Unit] = { for { - _ <- joinType match { - case Some(InnerJoin | LeftJoin) => Right(()) - case None => Right(()) // by default INNER JOIN - case _ => Left(s"Standard JOIN $this requires an INNER (default) or LEFT JOIN type") - } _ <- on match { case Some(o) => o.validate() case None => Left(s"Standard JOIN $this requires an ON clause") diff --git a/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala b/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala index 9733bc3a..0ee5ad4b 100644 --- a/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala +++ b/sql/src/test/scala/app/softnetwork/elastic/sql/parser/ParserSpec.scala @@ -3072,6 +3072,40 @@ class ParserSpec extends AnyFlatSpec with Matchers { } } + // ── Optional OUTER keyword in LEFT/RIGHT/FULL OUTER JOIN (Issue #94) ─────── + + private def firstStandardJoinType(sql: String): Option[JoinType] = { + Parser(sql).toOption.get + .asInstanceOf[SingleSearch] + .from + .mainTable + .joins + .head + .asInstanceOf[StandardJoin] + .joinType + } + + it should "parse LEFT JOIN and LEFT OUTER JOIN identically" in { + val short = "SELECT * FROM orders o LEFT JOIN customers c ON o.customer_id = c.id" + val long = "SELECT * FROM orders o LEFT OUTER JOIN customers c ON o.customer_id = c.id" + firstStandardJoinType(short) shouldBe Some(LeftJoin) + firstStandardJoinType(long) shouldBe Some(LeftJoin) + } + + it should "parse RIGHT JOIN and RIGHT OUTER JOIN identically" in { + val short = "SELECT * FROM orders o RIGHT JOIN customers c ON o.customer_id = c.id" + val long = "SELECT * FROM orders o RIGHT OUTER JOIN customers c ON o.customer_id = c.id" + firstStandardJoinType(short) shouldBe Some(RightJoin) + firstStandardJoinType(long) shouldBe Some(RightJoin) + } + + it should "parse FULL JOIN and FULL OUTER JOIN identically" in { + val short = "SELECT * FROM orders o FULL JOIN customers c ON o.customer_id = c.id" + val long = "SELECT * FROM orders o FULL OUTER JOIN customers c ON o.customer_id = c.id" + firstStandardJoinType(short) shouldBe Some(FullJoin) + firstStandardJoinType(long) shouldBe Some(FullJoin) + } + behavior of "Parser Cluster" it should "parse SHOW CLUSTER NAME" in {