-
Notifications
You must be signed in to change notification settings - Fork 687
Support Dialect level precedence, update Postgres Dialect to match Postgres
#1360
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,10 +44,13 @@ pub use self::redshift::RedshiftSqlDialect; | |
| pub use self::snowflake::SnowflakeDialect; | ||
| pub use self::sqlite::SQLiteDialect; | ||
| pub use crate::keywords; | ||
| use crate::parser::{Parser, ParserError, Precedence}; | ||
| use crate::parser::{Parser, ParserError}; | ||
|
|
||
| use crate::keywords::Keyword; | ||
| use crate::tokenizer::Token; | ||
| #[cfg(not(feature = "std"))] | ||
| use alloc::boxed::Box; | ||
| use log::debug; | ||
|
|
||
| /// Convenience check if a [`Parser`] uses a certain dialect. | ||
| /// | ||
|
|
@@ -300,17 +303,170 @@ pub trait Dialect: Debug + Any { | |
| // return None to fall back to the default behavior | ||
| None | ||
| } | ||
|
|
||
| /// Get the precedence of the next token | ||
| /// | ||
| /// Higher number => higher precedence | ||
| fn get_next_precedence_full(&self, parser: &Parser) -> Result<u8, ParserError> { | ||
| if let Some(precedence) = self.get_next_precedence(parser) { | ||
| return precedence; | ||
| } | ||
|
|
||
| let token = parser.peek_token(); | ||
| debug!("get_next_precedence() {:?}", token); | ||
| match token.token { | ||
| Token::Word(w) if w.keyword == Keyword::OR => Ok(OR_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::AND => Ok(AND_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::XOR => Ok(XOR_PREC), | ||
|
|
||
| Token::Word(w) if w.keyword == Keyword::AT => { | ||
| match ( | ||
| parser.peek_nth_token(1).token, | ||
| parser.peek_nth_token(2).token, | ||
| ) { | ||
| (Token::Word(w), Token::Word(w2)) | ||
| if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => | ||
| { | ||
| Ok(AT_TZ_PREC) | ||
| } | ||
| _ => Ok(UNKNOWN_PREC), | ||
| } | ||
| } | ||
|
|
||
| Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token { | ||
| // The precedence of NOT varies depending on keyword that | ||
| // follows it. If it is followed by IN, BETWEEN, or LIKE, | ||
| // it takes on the precedence of those tokens. Otherwise, it | ||
| // is not an infix operator, and therefore has zero | ||
| // precedence. | ||
| Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC), | ||
| _ => Ok(UNKNOWN_PREC), | ||
| }, | ||
| Token::Word(w) if w.keyword == Keyword::IS => Ok(IS_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(BETWEEN_PREC), | ||
| Token::Word(w) if w.keyword == Keyword::DIV => Ok(MUL_DIV_MOD_OP_PREC), | ||
| Token::Eq | ||
| | Token::Lt | ||
| | Token::LtEq | ||
| | Token::Neq | ||
| | Token::Gt | ||
| | Token::GtEq | ||
| | Token::DoubleEq | ||
| | Token::Tilde | ||
| | Token::TildeAsterisk | ||
| | Token::ExclamationMarkTilde | ||
| | Token::ExclamationMarkTildeAsterisk | ||
| | Token::DoubleTilde | ||
| | Token::DoubleTildeAsterisk | ||
| | Token::ExclamationMarkDoubleTilde | ||
| | Token::ExclamationMarkDoubleTildeAsterisk | ||
| | Token::Spaceship => Ok(EQ_PREC), | ||
| Token::Pipe => Ok(PIPE_PREC), | ||
| Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(CARET_PREC), | ||
| Token::Ampersand => Ok(AMPERSAND_PREC), | ||
| Token::Plus | Token::Minus => Ok(PLUS_MINUS_PREC), | ||
| Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => { | ||
| Ok(MUL_DIV_MOD_OP_PREC) | ||
| } | ||
| Token::DoubleColon | ||
| | Token::ExclamationMark | ||
| | Token::LBracket | ||
| | Token::Overlap | ||
| | Token::CaretAt => Ok(DOUBLE_COLON_PREC), | ||
| // Token::Colon if (self as dyn Dialect).is::<SnowflakeDialect>() => Ok(DOUBLE_COLON_PREC), | ||
| Token::Arrow | ||
| | Token::LongArrow | ||
| | Token::HashArrow | ||
| | Token::HashLongArrow | ||
| | Token::AtArrow | ||
| | Token::ArrowAt | ||
| | Token::HashMinus | ||
| | Token::AtQuestion | ||
| | Token::AtAt | ||
| | Token::Question | ||
| | Token::QuestionAnd | ||
| | Token::QuestionPipe | ||
| | Token::CustomBinaryOperator(_) => Ok(PG_OTHER_PREC), | ||
| _ => Ok(UNKNOWN_PREC), | ||
| } | ||
| } | ||
|
|
||
| /// Dialect-specific statement parser override | ||
| fn parse_statement(&self, _parser: &mut Parser) -> Option<Result<Statement, ParserError>> { | ||
| // return None to fall back to the default behavior | ||
| None | ||
| } | ||
|
|
||
| fn precedence_numeric(&self, p: Precedence) -> u8 { | ||
| p.numeric() | ||
| /// The following precedence values are used directly by `Parse` or in dialects, | ||
| /// so have to be made public by the dialect. | ||
| fn prec_double_colon(&self) -> u8 { | ||
| DOUBLE_COLON_PREC | ||
| } | ||
|
|
||
| fn prec_mul_div_mod_op(&self) -> u8 { | ||
| MUL_DIV_MOD_OP_PREC | ||
| } | ||
|
|
||
| fn prec_plus_minus(&self) -> u8 { | ||
| PLUS_MINUS_PREC | ||
| } | ||
|
|
||
| fn prec_between(&self) -> u8 { | ||
| BETWEEN_PREC | ||
| } | ||
|
|
||
| fn prec_like(&self) -> u8 { | ||
| LIKE_PREC | ||
| } | ||
|
|
||
| fn prec_unary_not(&self) -> u8 { | ||
| UNARY_NOT_PREC | ||
| } | ||
|
|
||
| fn prec_unknown(&self) -> u8 { | ||
| UNKNOWN_PREC | ||
| } | ||
| } | ||
|
|
||
| // Define the lexical Precedence of operators. | ||
| // | ||
| // Uses (APPROXIMATELY) <https://www.postgresql.org/docs/7.0/operators.htm#AEN2026> as a reference | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This statement really isn't true, hence I added We could rewrite to "was originally inspired by" or something?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is fine |
||
| // higher number = higher precedence | ||
| // | ||
| // NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator | ||
| // actually has higher precedence than addition. | ||
| // See <https://postgrespro.com/list/thread-id/2673331>. | ||
| const DOUBLE_COLON_PREC: u8 = 50; | ||
| const AT_TZ_PREC: u8 = 41; | ||
| const MUL_DIV_MOD_OP_PREC: u8 = 40; | ||
| const PLUS_MINUS_PREC: u8 = 30; | ||
| const XOR_PREC: u8 = 24; | ||
| const AMPERSAND_PREC: u8 = 23; | ||
| const CARET_PREC: u8 = 22; | ||
| const PIPE_PREC: u8 = 21; | ||
| const BETWEEN_PREC: u8 = 20; | ||
| const EQ_PREC: u8 = 20; | ||
| const LIKE_PREC: u8 = 19; | ||
| const IS_PREC: u8 = 17; | ||
| const PG_OTHER_PREC: u8 = 16; | ||
| const UNARY_NOT_PREC: u8 = 15; | ||
| const AND_PREC: u8 = 10; | ||
| const OR_PREC: u8 = 5; | ||
| const UNKNOWN_PREC: u8 = 0; | ||
|
|
||
| impl dyn Dialect { | ||
| #[inline] | ||
| pub fn is<T: Dialect>(&self) -> bool { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,15 +14,29 @@ use log::debug; | |
| use crate::ast::{CommentObject, Statement}; | ||
| use crate::dialect::Dialect; | ||
| use crate::keywords::Keyword; | ||
| use crate::parser::{Parser, ParserError, Precedence}; | ||
| use crate::parser::{Parser, ParserError}; | ||
| use crate::tokenizer::Token; | ||
|
|
||
| /// A [`Dialect`] for [PostgreSQL](https://www.postgresql.org/) | ||
| #[derive(Debug)] | ||
| pub struct PostgreSqlDialect {} | ||
|
|
||
| const DOUBLE_COLON_PREC: u8 = 140; | ||
| const BRACKET_PREC: u8 = 130; | ||
| const COLLATE_PREC: u8 = 120; | ||
| const AT_TZ_PREC: u8 = 110; | ||
| const CARET_PREC: u8 = 100; | ||
| const MUL_DIV_MOD_OP_PREC: u8 = 90; | ||
| const PLUS_MINUS_PREC: u8 = 80; | ||
| // there's no XOR operator in PostgreSQL, but support it here to avoid breaking tests | ||
| const XOR_PREC: u8 = 75; | ||
|
Comment on lines
+31
to
+32
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure what to do about this, if we remove
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah I think we should just leave it in unless there is a compelling reason to take it out |
||
| const PG_OTHER_PREC: u8 = 70; | ||
| const BETWEEN_LIKE_PREC: u8 = 60; | ||
| const EQ_PREC: u8 = 50; | ||
| const IS_PREC: u8 = 40; | ||
| const NOT_PREC: u8 = 30; | ||
| const AND_PREC: u8 = 20; | ||
| const OR_PREC: u8 = 10; | ||
|
|
||
| impl Dialect for PostgreSqlDialect { | ||
| fn identifier_quote_style(&self, _identifier: &str) -> Option<char> { | ||
|
|
@@ -75,14 +89,10 @@ impl Dialect for PostgreSqlDialect { | |
| let token = parser.peek_token(); | ||
| debug!("get_next_precedence() {:?}", token); | ||
|
|
||
| macro_rules! p { | ||
| ($precedence:ident) => {self.precedence_numeric(Precedence::$precedence)}; | ||
| } | ||
|
|
||
| let precedence = match token.token { | ||
| Token::Word(w) if w.keyword == Keyword::OR => p!(Or), | ||
| Token::Word(w) if w.keyword == Keyword::XOR => p!(Xor), | ||
| Token::Word(w) if w.keyword == Keyword::AND => p!(And), | ||
| Token::Word(w) if w.keyword == Keyword::OR => OR_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::XOR => XOR_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::AND => AND_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::AT => { | ||
| match ( | ||
| parser.peek_nth_token(1).token, | ||
|
|
@@ -91,9 +101,9 @@ impl Dialect for PostgreSqlDialect { | |
| (Token::Word(w), Token::Word(w2)) | ||
| if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => | ||
| { | ||
| p!(AtTz) | ||
| AT_TZ_PREC | ||
| } | ||
| _ => p!(Unknown), | ||
| _ => self.prec_unknown(), | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -103,25 +113,25 @@ impl Dialect for PostgreSqlDialect { | |
| // it takes on the precedence of those tokens. Otherwise, it | ||
| // is not an infix operator, and therefore has zero | ||
| // precedence. | ||
| Token::Word(w) if w.keyword == Keyword::IN => p!(Between), | ||
| Token::Word(w) if w.keyword == Keyword::BETWEEN => p!(Between), | ||
| Token::Word(w) if w.keyword == Keyword::LIKE => p!(Between), | ||
| Token::Word(w) if w.keyword == Keyword::ILIKE => p!(Between), | ||
| Token::Word(w) if w.keyword == Keyword::RLIKE => p!(Between), | ||
| Token::Word(w) if w.keyword == Keyword::REGEXP => p!(Between), | ||
| Token::Word(w) if w.keyword == Keyword::SIMILAR => p!(Between), | ||
| _ => p!(Unknown), | ||
| Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC, | ||
| _ => self.prec_unknown(), | ||
| }, | ||
| Token::Word(w) if w.keyword == Keyword::IS => p!(Is), | ||
| Token::Word(w) if w.keyword == Keyword::IN => p!(Between), | ||
| Token::Word(w) if w.keyword == Keyword::BETWEEN => p!(Between), | ||
| Token::Word(w) if w.keyword == Keyword::LIKE => p!(Between), | ||
| Token::Word(w) if w.keyword == Keyword::ILIKE => p!(Between), | ||
| Token::Word(w) if w.keyword == Keyword::RLIKE => p!(Between), | ||
| Token::Word(w) if w.keyword == Keyword::REGEXP => p!(Between), | ||
| Token::Word(w) if w.keyword == Keyword::SIMILAR => p!(Between), | ||
| Token::Word(w) if w.keyword == Keyword::OPERATOR => p!(Between), | ||
| Token::Word(w) if w.keyword == Keyword::DIV => p!(MulDivModOp), | ||
| Token::Word(w) if w.keyword == Keyword::IS => IS_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::OPERATOR => BETWEEN_LIKE_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::DIV => MUL_DIV_MOD_OP_PREC, | ||
| Token::Word(w) if w.keyword == Keyword::COLLATE => COLLATE_PREC, | ||
| Token::Eq | ||
| | Token::Lt | ||
|
|
@@ -138,13 +148,11 @@ impl Dialect for PostgreSqlDialect { | |
| | Token::DoubleTildeAsterisk | ||
| | Token::ExclamationMarkDoubleTilde | ||
| | Token::ExclamationMarkDoubleTildeAsterisk | ||
| | Token::Spaceship => p!(Eq), | ||
| Token::Pipe => p!(Pipe), | ||
| Token::Caret => p!(Caret), | ||
| Token::Ampersand => p!(Ampersand), | ||
| Token::Plus | Token::Minus => p!(PlusMinus), | ||
| Token::Mul | Token::Div | Token::Mod => p!(MulDivModOp), | ||
| Token::DoubleColon => p!(DoubleColon), | ||
| | Token::Spaceship => EQ_PREC, | ||
| Token::Caret => CARET_PREC, | ||
| Token::Plus | Token::Minus => PLUS_MINUS_PREC, | ||
| Token::Mul | Token::Div | Token::Mod => MUL_DIV_MOD_OP_PREC, | ||
| Token::DoubleColon => DOUBLE_COLON_PREC, | ||
| Token::LBracket => BRACKET_PREC, | ||
| Token::Arrow | ||
| | Token::LongArrow | ||
|
|
@@ -165,8 +173,10 @@ impl Dialect for PostgreSqlDialect { | |
| | Token::Sharp | ||
| | Token::ShiftRight | ||
| | Token::ShiftLeft | ||
| | Token::CustomBinaryOperator(_) => p!(PgOther), | ||
| _ => p!(Unknown), | ||
| | Token::Pipe | ||
| | Token::Ampersand | ||
| | Token::CustomBinaryOperator(_) => PG_OTHER_PREC, | ||
| _ => self.prec_unknown(), | ||
| }; | ||
| Some(Ok(precedence)) | ||
| } | ||
|
|
@@ -187,42 +197,24 @@ impl Dialect for PostgreSqlDialect { | |
| true | ||
| } | ||
|
|
||
| /* | ||
| const DOUBLE_COLON_PREC: u8 = 140; | ||
| const BRACKET_PREC: u8 = 130; | ||
| const COLLATE_PREC: u8 = 120; | ||
| const AT_TZ_PREC: u8 = 110; | ||
| const CARET_PREC: u8 = 100; | ||
| const MUL_DIV_MOD_OP_PREC: u8 = 90; | ||
| const PLUS_MINUS_PREC: u8 = 80; | ||
| const PG_OTHER_PREC: u8 = 70; | ||
| const BETWEEN_LIKE_PREC: u8 = 60; | ||
| const EQ_PREC: u8 = 50; | ||
| const IS_PREC: u8 = 40; | ||
| const NOT_PREC: u8 = 30; | ||
| const AND_PREC: u8 = 20; | ||
| const OR_PREC: u8 = 10; | ||
| const UNKNOWN_PREC: u8 = 0; | ||
| */ | ||
| /// based on https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-PRECEDENCE | ||
| fn precedence_numeric(&self, p: Precedence) -> u8 { | ||
| match p { | ||
| Precedence::DoubleColon => 140, | ||
| Precedence::AtTz => 110, | ||
| Precedence::MulDivModOp => 90, | ||
| Precedence::PlusMinus => 80, | ||
| Precedence::Caret => 110, | ||
| Precedence::Between => 60, | ||
| Precedence::Eq => 50, | ||
| Precedence::Like => 60, | ||
| Precedence::Is => 40, | ||
| Precedence::PgOther | Precedence::Pipe | Precedence::Ampersand => 70, | ||
| Precedence::UnaryNot => 30, | ||
| Precedence::And => 20, | ||
| Precedence::Xor => 79, | ||
| Precedence::Or => 10, | ||
| Precedence::Unknown => 0, | ||
| } | ||
| fn prec_mul_div_mod_op(&self) -> u8 { | ||
| MUL_DIV_MOD_OP_PREC | ||
| } | ||
|
|
||
| fn prec_plus_minus(&self) -> u8 { | ||
| PLUS_MINUS_PREC | ||
| } | ||
|
|
||
| fn prec_between(&self) -> u8 { | ||
| BETWEEN_LIKE_PREC | ||
| } | ||
|
|
||
| fn prec_like(&self) -> u8 { | ||
| BETWEEN_LIKE_PREC | ||
| } | ||
|
|
||
| fn prec_unary_not(&self) -> u8 { | ||
| NOT_PREC | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these are the precedence values that either
Parserneeds to know about, or are used by implementations of the trait, e.g.prec_double_colonis used bySnowflake.