From e1ee4ee187f9c2a63cd402127ad912308798ef16 Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Thu, 8 Jan 2026 15:38:54 +0100 Subject: [PATCH 1/2] fix: qualified column names with SQL keywords parse as identifiers Since v0.55.0, qualified column names using SQL keywords failed to parse. For example, `T.interval` in `PARTITION BY T.key, T.interval ORDER BY ...` was incorrectly interpreted as an INTERVAL expression instead of a column. In `3e90a18` changed `parse_compound_expr` to use `parse_subexpr()` for tokens after `.`, causing keywords like INTERVAL, CASE, CAST, etc. to be treated as expression starters. Explicitly handle `Token::Word` in `parse_compound_expr` by treating it as an identifier. If followed by `(` (excluding the `(+)` outer join operator), parse as a method call. This restores the original behavior where words after `.` were always converted to identifiers. --- src/parser/mod.rs | 15 +++++++++++++ tests/sqlparser_common.rs | 45 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3294acf6a..7e0f7b090 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1856,6 +1856,21 @@ impl<'a> Parser<'a> { chain.push(AccessExpr::Dot(expr)); self.advance_token(); // The consumed string } + // Handle words (including keywords like INTERVAL) as identifiers + // when they appear after a period. This ensures `T.interval` is + // parsed as a compound identifier, not as an interval expression. + // If followed by `(`, parse as a method call (but not for `(+)` + // which is the outer join operator in some dialects). + Token::Word(w) => { + let ident = w.clone().into_ident(next_token.span); + self.advance_token(); + if self.peek_token() == Token::LParen && !self.peek_outer_join_operator() { + let expr = self.parse_function(ObjectName::from(vec![ident]))?; + chain.push(AccessExpr::Dot(expr)); + } else { + chain.push(AccessExpr::Dot(Expr::Identifier(ident))); + } + } // Fallback to parsing an arbitrary expression. _ => match self.parse_subexpr(self.dialect.prec_value(Precedence::Period))? { // If we get back a compound field access or identifier, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 365bddb0f..7906db46a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15009,6 +15009,51 @@ fn test_reserved_keywords_for_identifiers() { dialects.parse_sql_statements(sql).unwrap(); } +#[test] +fn test_keywords_as_column_names_after_dot() { + // Test various keywords that have special meaning when standalone + // but should be treated as identifiers after a dot. + let keywords = [ + "interval", // INTERVAL '1' DAY + "case", // CASE WHEN ... END + "cast", // CAST(x AS y) + "extract", // EXTRACT(DAY FROM ...) + "trim", // TRIM(...) + "substring", // SUBSTRING(...) + "left", // LEFT(str, n) + "right", // RIGHT(str, n) + ]; + + for kw in keywords { + let sql = format!("SELECT T.{kw} FROM T"); + verified_stmt(&sql); + + let sql = format!("SELECT SUM(x) OVER (PARTITION BY T.{kw} ORDER BY T.id) FROM T"); + verified_stmt(&sql); + + let sql = format!("SELECT T.{kw}, S.{kw} FROM T, S WHERE T.{kw} = S.{kw}"); + verified_stmt(&sql); + } + + let select = verified_only_select("SELECT T.interval, T.case FROM T"); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::CompoundIdentifier(idents)) => { + assert_eq!(idents.len(), 2); + assert_eq!(idents[0].value, "T"); + assert_eq!(idents[1].value, "interval"); + } + _ => panic!("Expected CompoundIdentifier for T.interval"), + } + match &select.projection[1] { + SelectItem::UnnamedExpr(Expr::CompoundIdentifier(idents)) => { + assert_eq!(idents.len(), 2); + assert_eq!(idents[0].value, "T"); + assert_eq!(idents[1].value, "case"); + } + _ => panic!("Expected CompoundIdentifier for T.case"), + } +} + #[test] fn parse_create_table_with_bit_types() { let sql = "CREATE TABLE t (a BIT, b BIT VARYING, c BIT(42), d BIT VARYING(43))"; From 4ea8e36a2b6df1bf883e91e6d2645a67cdc7b106 Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Thu, 22 Jan 2026 13:40:37 +0100 Subject: [PATCH 2/2] Use single catch-all branch and check expr --- src/parser/mod.rs | 78 +++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7e0f7b090..c6df51fcc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1856,41 +1856,53 @@ impl<'a> Parser<'a> { chain.push(AccessExpr::Dot(expr)); self.advance_token(); // The consumed string } - // Handle words (including keywords like INTERVAL) as identifiers - // when they appear after a period. This ensures `T.interval` is - // parsed as a compound identifier, not as an interval expression. - // If followed by `(`, parse as a method call (but not for `(+)` - // which is the outer join operator in some dialects). - Token::Word(w) => { - let ident = w.clone().into_ident(next_token.span); - self.advance_token(); - if self.peek_token() == Token::LParen && !self.peek_outer_join_operator() { - let expr = self.parse_function(ObjectName::from(vec![ident]))?; - chain.push(AccessExpr::Dot(expr)); - } else { - chain.push(AccessExpr::Dot(Expr::Identifier(ident))); + // Fallback to parsing an arbitrary expression, but restrict to expression + // types that are valid after the dot operator. This ensures that e.g. + // `T.interval` is parsed as a compound identifier, not as an interval + // expression. + _ => { + let expr = self.maybe_parse(|parser| { + let expr = parser + .parse_subexpr(parser.dialect.prec_value(Precedence::Period))?; + match &expr { + Expr::CompoundFieldAccess { .. } + | Expr::CompoundIdentifier(_) + | Expr::Identifier(_) + | Expr::Value(_) + | Expr::Function(_) => Ok(expr), + _ => parser.expected("an identifier or value", parser.peek_token()), + } + })?; + + match expr { + // If we get back a compound field access or identifier, + // we flatten the nested expression. + // For example if the current root is `foo` + // and we get back a compound identifier expression `bar.baz` + // The full expression should be `foo.bar.baz` (i.e. + // a root with an access chain with 2 entries) and not + // `foo.(bar.baz)` (i.e. a root with an access chain with + // 1 entry`). + Some(Expr::CompoundFieldAccess { root, access_chain }) => { + chain.push(AccessExpr::Dot(*root)); + chain.extend(access_chain); + } + Some(Expr::CompoundIdentifier(parts)) => chain.extend( + parts.into_iter().map(Expr::Identifier).map(AccessExpr::Dot), + ), + Some(expr) => { + chain.push(AccessExpr::Dot(expr)); + } + // If the expression is not a valid suffix, fall back to + // parsing as an identifier. This handles cases like `T.interval` + // where `interval` is a keyword but should be treated as an identifier. + None => { + chain.push(AccessExpr::Dot(Expr::Identifier( + self.parse_identifier()?, + ))); + } } } - // Fallback to parsing an arbitrary expression. - _ => match self.parse_subexpr(self.dialect.prec_value(Precedence::Period))? { - // If we get back a compound field access or identifier, - // we flatten the nested expression. - // For example if the current root is `foo` - // and we get back a compound identifier expression `bar.baz` - // The full expression should be `foo.bar.baz` (i.e. - // a root with an access chain with 2 entries) and not - // `foo.(bar.baz)` (i.e. a root with an access chain with - // 1 entry`). - Expr::CompoundFieldAccess { root, access_chain } => { - chain.push(AccessExpr::Dot(*root)); - chain.extend(access_chain); - } - Expr::CompoundIdentifier(parts) => chain - .extend(parts.into_iter().map(Expr::Identifier).map(AccessExpr::Dot)), - expr => { - chain.push(AccessExpr::Dot(expr)); - } - }, } } else if !self.dialect.supports_partiql() && self.peek_token_ref().token == Token::LBracket