Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
simplify precedence logic, add tests
  • Loading branch information
samuelcolvin committed Aug 1, 2024
commit ac53f97b9b73876c018d73d2be7eac9852f04f64
162 changes: 159 additions & 3 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,13 @@ pub use self::redshift::RedshiftSqlDialect;
pub use self::snowflake::SnowflakeDialect;
pub use self::sqlite::SQLiteDialect;
pub use crate::keywords;
use crate::parser::{Parser, ParserError, Precedence};
use crate::parser::{Parser, ParserError};

use crate::keywords::Keyword;
use crate::tokenizer::Token;
#[cfg(not(feature = "std"))]
use alloc::boxed::Box;
use log::debug;

/// Convenience check if a [`Parser`] uses a certain dialect.
///
Expand Down Expand Up @@ -300,17 +303,170 @@ pub trait Dialect: Debug + Any {
// return None to fall back to the default behavior
None
}

/// Get the precedence of the next token
///
/// Higher number => higher precedence
fn get_next_precedence_full(&self, parser: &Parser) -> Result<u8, ParserError> {
if let Some(precedence) = self.get_next_precedence(parser) {
return precedence;
}

let token = parser.peek_token();
debug!("get_next_precedence() {:?}", token);
match token.token {
Token::Word(w) if w.keyword == Keyword::OR => Ok(OR_PREC),
Token::Word(w) if w.keyword == Keyword::AND => Ok(AND_PREC),
Token::Word(w) if w.keyword == Keyword::XOR => Ok(XOR_PREC),

Token::Word(w) if w.keyword == Keyword::AT => {
match (
parser.peek_nth_token(1).token,
parser.peek_nth_token(2).token,
) {
(Token::Word(w), Token::Word(w2))
if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
{
Ok(AT_TZ_PREC)
}
_ => Ok(UNKNOWN_PREC),
}
}

Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token {
// The precedence of NOT varies depending on keyword that
// follows it. If it is followed by IN, BETWEEN, or LIKE,
// it takes on the precedence of those tokens. Otherwise, it
// is not an infix operator, and therefore has zero
// precedence.
Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC),
_ => Ok(UNKNOWN_PREC),
},
Token::Word(w) if w.keyword == Keyword::IS => Ok(IS_PREC),
Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC),
Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(BETWEEN_PREC),
Token::Word(w) if w.keyword == Keyword::DIV => Ok(MUL_DIV_MOD_OP_PREC),
Token::Eq
| Token::Lt
| Token::LtEq
| Token::Neq
| Token::Gt
| Token::GtEq
| Token::DoubleEq
| Token::Tilde
| Token::TildeAsterisk
| Token::ExclamationMarkTilde
| Token::ExclamationMarkTildeAsterisk
| Token::DoubleTilde
| Token::DoubleTildeAsterisk
| Token::ExclamationMarkDoubleTilde
| Token::ExclamationMarkDoubleTildeAsterisk
| Token::Spaceship => Ok(EQ_PREC),
Token::Pipe => Ok(PIPE_PREC),
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(CARET_PREC),
Token::Ampersand => Ok(AMPERSAND_PREC),
Token::Plus | Token::Minus => Ok(PLUS_MINUS_PREC),
Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => {
Ok(MUL_DIV_MOD_OP_PREC)
}
Token::DoubleColon
| Token::ExclamationMark
| Token::LBracket
| Token::Overlap
| Token::CaretAt => Ok(DOUBLE_COLON_PREC),
// Token::Colon if (self as dyn Dialect).is::<SnowflakeDialect>() => Ok(DOUBLE_COLON_PREC),
Token::Arrow
| Token::LongArrow
| Token::HashArrow
| Token::HashLongArrow
| Token::AtArrow
| Token::ArrowAt
| Token::HashMinus
| Token::AtQuestion
| Token::AtAt
| Token::Question
| Token::QuestionAnd
| Token::QuestionPipe
| Token::CustomBinaryOperator(_) => Ok(PG_OTHER_PREC),
_ => Ok(UNKNOWN_PREC),
}
}

/// Dialect-specific statement parser override
fn parse_statement(&self, _parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
// return None to fall back to the default behavior
None
}

fn precedence_numeric(&self, p: Precedence) -> u8 {
p.numeric()
/// The following precedence values are used directly by `Parse` or in dialects,
/// so have to be made public by the dialect.
fn prec_double_colon(&self) -> u8 {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these are the precedence values that either Parser needs to know about, or are used by implementations of the trait, e.g. prec_double_colon is used by Snowflake.

DOUBLE_COLON_PREC
}

fn prec_mul_div_mod_op(&self) -> u8 {
MUL_DIV_MOD_OP_PREC
}

fn prec_plus_minus(&self) -> u8 {
PLUS_MINUS_PREC
}

fn prec_between(&self) -> u8 {
BETWEEN_PREC
}

fn prec_like(&self) -> u8 {
LIKE_PREC
}

fn prec_unary_not(&self) -> u8 {
UNARY_NOT_PREC
}

fn prec_unknown(&self) -> u8 {
UNKNOWN_PREC
}
}

// Define the lexical Precedence of operators.
//
// Uses (APPROXIMATELY) <https://www.postgresql.org/docs/7.0/operators.htm#AEN2026> as a reference
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This statement really isn't true, hence I added APPROXIMATELY.

We could rewrite to "was originally inspired by" or something?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is fine

// higher number = higher precedence
//
// NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator
// actually has higher precedence than addition.
// See <https://postgrespro.com/list/thread-id/2673331>.
const DOUBLE_COLON_PREC: u8 = 50;
const AT_TZ_PREC: u8 = 41;
const MUL_DIV_MOD_OP_PREC: u8 = 40;
const PLUS_MINUS_PREC: u8 = 30;
const XOR_PREC: u8 = 24;
const AMPERSAND_PREC: u8 = 23;
const CARET_PREC: u8 = 22;
const PIPE_PREC: u8 = 21;
const BETWEEN_PREC: u8 = 20;
const EQ_PREC: u8 = 20;
const LIKE_PREC: u8 = 19;
const IS_PREC: u8 = 17;
const PG_OTHER_PREC: u8 = 16;
const UNARY_NOT_PREC: u8 = 15;
const AND_PREC: u8 = 10;
const OR_PREC: u8 = 5;
const UNKNOWN_PREC: u8 = 0;

impl dyn Dialect {
#[inline]
pub fn is<T: Dialect>(&self) -> bool {
Expand Down
138 changes: 65 additions & 73 deletions src/dialect/postgresql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,29 @@ use log::debug;
use crate::ast::{CommentObject, Statement};
use crate::dialect::Dialect;
use crate::keywords::Keyword;
use crate::parser::{Parser, ParserError, Precedence};
use crate::parser::{Parser, ParserError};
use crate::tokenizer::Token;

/// A [`Dialect`] for [PostgreSQL](https://www.postgresql.org/)
#[derive(Debug)]
pub struct PostgreSqlDialect {}

const DOUBLE_COLON_PREC: u8 = 140;
const BRACKET_PREC: u8 = 130;
const COLLATE_PREC: u8 = 120;
const AT_TZ_PREC: u8 = 110;
const CARET_PREC: u8 = 100;
const MUL_DIV_MOD_OP_PREC: u8 = 90;
const PLUS_MINUS_PREC: u8 = 80;
// there's no XOR operator in PostgreSQL, but support it here to avoid breaking tests
const XOR_PREC: u8 = 75;
Comment on lines +31 to +32
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what to do about this, if we remove XOR logic from Postgres, one tests fails, but I guess people might well be using it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah I think we should just leave it in unless there is a compelling reason to take it out

const PG_OTHER_PREC: u8 = 70;
const BETWEEN_LIKE_PREC: u8 = 60;
const EQ_PREC: u8 = 50;
const IS_PREC: u8 = 40;
const NOT_PREC: u8 = 30;
const AND_PREC: u8 = 20;
const OR_PREC: u8 = 10;

impl Dialect for PostgreSqlDialect {
fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
Expand Down Expand Up @@ -75,14 +89,10 @@ impl Dialect for PostgreSqlDialect {
let token = parser.peek_token();
debug!("get_next_precedence() {:?}", token);

macro_rules! p {
($precedence:ident) => {self.precedence_numeric(Precedence::$precedence)};
}

let precedence = match token.token {
Token::Word(w) if w.keyword == Keyword::OR => p!(Or),
Token::Word(w) if w.keyword == Keyword::XOR => p!(Xor),
Token::Word(w) if w.keyword == Keyword::AND => p!(And),
Token::Word(w) if w.keyword == Keyword::OR => OR_PREC,
Token::Word(w) if w.keyword == Keyword::XOR => XOR_PREC,
Token::Word(w) if w.keyword == Keyword::AND => AND_PREC,
Token::Word(w) if w.keyword == Keyword::AT => {
match (
parser.peek_nth_token(1).token,
Expand All @@ -91,9 +101,9 @@ impl Dialect for PostgreSqlDialect {
(Token::Word(w), Token::Word(w2))
if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
{
p!(AtTz)
AT_TZ_PREC
}
_ => p!(Unknown),
_ => self.prec_unknown(),
}
}

Expand All @@ -103,25 +113,25 @@ impl Dialect for PostgreSqlDialect {
// it takes on the precedence of those tokens. Otherwise, it
// is not an infix operator, and therefore has zero
// precedence.
Token::Word(w) if w.keyword == Keyword::IN => p!(Between),
Token::Word(w) if w.keyword == Keyword::BETWEEN => p!(Between),
Token::Word(w) if w.keyword == Keyword::LIKE => p!(Between),
Token::Word(w) if w.keyword == Keyword::ILIKE => p!(Between),
Token::Word(w) if w.keyword == Keyword::RLIKE => p!(Between),
Token::Word(w) if w.keyword == Keyword::REGEXP => p!(Between),
Token::Word(w) if w.keyword == Keyword::SIMILAR => p!(Between),
_ => p!(Unknown),
Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC,
_ => self.prec_unknown(),
},
Token::Word(w) if w.keyword == Keyword::IS => p!(Is),
Token::Word(w) if w.keyword == Keyword::IN => p!(Between),
Token::Word(w) if w.keyword == Keyword::BETWEEN => p!(Between),
Token::Word(w) if w.keyword == Keyword::LIKE => p!(Between),
Token::Word(w) if w.keyword == Keyword::ILIKE => p!(Between),
Token::Word(w) if w.keyword == Keyword::RLIKE => p!(Between),
Token::Word(w) if w.keyword == Keyword::REGEXP => p!(Between),
Token::Word(w) if w.keyword == Keyword::SIMILAR => p!(Between),
Token::Word(w) if w.keyword == Keyword::OPERATOR => p!(Between),
Token::Word(w) if w.keyword == Keyword::DIV => p!(MulDivModOp),
Token::Word(w) if w.keyword == Keyword::IS => IS_PREC,
Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::OPERATOR => BETWEEN_LIKE_PREC,
Token::Word(w) if w.keyword == Keyword::DIV => MUL_DIV_MOD_OP_PREC,
Token::Word(w) if w.keyword == Keyword::COLLATE => COLLATE_PREC,
Token::Eq
| Token::Lt
Expand All @@ -138,13 +148,11 @@ impl Dialect for PostgreSqlDialect {
| Token::DoubleTildeAsterisk
| Token::ExclamationMarkDoubleTilde
| Token::ExclamationMarkDoubleTildeAsterisk
| Token::Spaceship => p!(Eq),
Token::Pipe => p!(Pipe),
Token::Caret => p!(Caret),
Token::Ampersand => p!(Ampersand),
Token::Plus | Token::Minus => p!(PlusMinus),
Token::Mul | Token::Div | Token::Mod => p!(MulDivModOp),
Token::DoubleColon => p!(DoubleColon),
| Token::Spaceship => EQ_PREC,
Token::Caret => CARET_PREC,
Token::Plus | Token::Minus => PLUS_MINUS_PREC,
Token::Mul | Token::Div | Token::Mod => MUL_DIV_MOD_OP_PREC,
Token::DoubleColon => DOUBLE_COLON_PREC,
Token::LBracket => BRACKET_PREC,
Token::Arrow
| Token::LongArrow
Expand All @@ -165,8 +173,10 @@ impl Dialect for PostgreSqlDialect {
| Token::Sharp
| Token::ShiftRight
| Token::ShiftLeft
| Token::CustomBinaryOperator(_) => p!(PgOther),
_ => p!(Unknown),
| Token::Pipe
| Token::Ampersand
| Token::CustomBinaryOperator(_) => PG_OTHER_PREC,
_ => self.prec_unknown(),
};
Some(Ok(precedence))
}
Expand All @@ -187,42 +197,24 @@ impl Dialect for PostgreSqlDialect {
true
}

/*
const DOUBLE_COLON_PREC: u8 = 140;
const BRACKET_PREC: u8 = 130;
const COLLATE_PREC: u8 = 120;
const AT_TZ_PREC: u8 = 110;
const CARET_PREC: u8 = 100;
const MUL_DIV_MOD_OP_PREC: u8 = 90;
const PLUS_MINUS_PREC: u8 = 80;
const PG_OTHER_PREC: u8 = 70;
const BETWEEN_LIKE_PREC: u8 = 60;
const EQ_PREC: u8 = 50;
const IS_PREC: u8 = 40;
const NOT_PREC: u8 = 30;
const AND_PREC: u8 = 20;
const OR_PREC: u8 = 10;
const UNKNOWN_PREC: u8 = 0;
*/
/// based on https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-PRECEDENCE
fn precedence_numeric(&self, p: Precedence) -> u8 {
match p {
Precedence::DoubleColon => 140,
Precedence::AtTz => 110,
Precedence::MulDivModOp => 90,
Precedence::PlusMinus => 80,
Precedence::Caret => 110,
Precedence::Between => 60,
Precedence::Eq => 50,
Precedence::Like => 60,
Precedence::Is => 40,
Precedence::PgOther | Precedence::Pipe | Precedence::Ampersand => 70,
Precedence::UnaryNot => 30,
Precedence::And => 20,
Precedence::Xor => 79,
Precedence::Or => 10,
Precedence::Unknown => 0,
}
fn prec_mul_div_mod_op(&self) -> u8 {
MUL_DIV_MOD_OP_PREC
}

fn prec_plus_minus(&self) -> u8 {
PLUS_MINUS_PREC
}

fn prec_between(&self) -> u8 {
BETWEEN_LIKE_PREC
}

fn prec_like(&self) -> u8 {
BETWEEN_LIKE_PREC
}

fn prec_unary_not(&self) -> u8 {
NOT_PREC
}
}

Expand Down
Loading