From 377dfa084cd5c38cb3a97460bc86a80b1135a0cc Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 10 Sep 2021 08:55:28 +0200 Subject: [PATCH 01/61] Switch back to development mode. --- CHANGELOG | 6 ++++++ sqlparse/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 65e03fce..653ff4d1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +Development Version +------------------- + +Nothing yet. + + Release 0.4.2 (Sep 10, 2021) ---------------------------- diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index 9cab9d2c..33be0eee 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -16,7 +16,7 @@ from sqlparse import formatter -__version__ = '0.4.2' +__version__ = '0.4.3.dev0' __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli'] From 815040ebd85374c0a6e91fba1445c5f2425f0001 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 10 Sep 2021 09:15:56 +0200 Subject: [PATCH 02/61] Create python-app.yml --- .github/workflows/python-app.yml | 39 ++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 .github/workflows/python-app.yml diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml new file mode 100644 index 00000000..e09995db --- /dev/null +++ b/.github/workflows/python-app.yml @@ -0,0 +1,39 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Python application + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.5, 3.6, 3.7, 3.8, 3.9] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pytest From d5165aef8187c9a8560905a647780a0b9f6d7b7f Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 10 Sep 2021 09:24:23 +0200 Subject: [PATCH 03/61] Update github action. --- .github/workflows/python-app.yml | 4 ++-- README.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index e09995db..06592365 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -31,9 +31,9 @@ jobs: - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics sqlparse # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics sqlparse - name: Test with pytest run: | pytest diff --git a/README.rst b/README.rst index 92e15c18..a8408869 100644 --- a/README.rst +++ b/README.rst @@ -67,8 +67,8 @@ sqlparse is licensed under the BSD license. Parts of the code are based on pygments written by Georg Brandl and others. pygments-Homepage: http://pygments.org/ -.. |buildstatus| image:: https://secure.travis-ci.org/andialbrecht/sqlparse.png?branch=master -.. _buildstatus: https://travis-ci.org/#!/andialbrecht/sqlparse +.. |buildstatus| image:: https://github.com/andialbrecht/sqlparse/actions/workflows/python-app.yml/badge.svg +.. _buildstatus: https://github.com/andialbrecht/sqlparse/actions/workflows/python-app.yml .. |coverage| image:: https://codecov.io/gh/andialbrecht/sqlparse/branch/master/graph/badge.svg .. _coverage: https://codecov.io/gh/andialbrecht/sqlparse .. |docs| image:: https://readthedocs.org/projects/sqlparse/badge/?version=latest From 611c91d58342593d31136c84d12576ca5367b32c Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 10 Sep 2021 09:29:10 +0200 Subject: [PATCH 04/61] CI: Publish code coverage. --- .github/workflows/python-app.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 06592365..c9590879 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -26,7 +26,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install flake8 pytest + pip install flake8 pytest pytest-cov codecov if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Lint with flake8 run: | @@ -36,4 +36,7 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics sqlparse - name: Test with pytest run: | - pytest + pytest --cov=sqlparse + - name: Publish to codecov + run: | + codecov From c9bcd0bdb3c9275b5e8312278ec59cfa7b6e9f1f Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 10 Sep 2021 09:30:28 +0200 Subject: [PATCH 05/61] CI: Fix flake8 command. --- .github/workflows/python-app.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index c9590879..82047464 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -31,9 +31,9 @@ jobs: - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics sqlparse + flake8 sqlparse --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics sqlparse + flake8 sqlparse --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics sqlparse - name: Test with pytest run: | pytest --cov=sqlparse From 8538af5a706d128fd8bfb6fd4ff5c034e8334ea4 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 10 Sep 2021 09:37:02 +0200 Subject: [PATCH 06/61] CI: ..and also fix the other flake8 command. --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 82047464..b23198d7 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -33,7 +33,7 @@ jobs: # stop the build if there are Python syntax errors or undefined names flake8 sqlparse --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 sqlparse --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics sqlparse + flake8 sqlparse --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | pytest --cov=sqlparse From c7dfe286c25945000275cf9cda9721b8b181b3d7 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 10 Sep 2021 09:41:27 +0200 Subject: [PATCH 07/61] CI: Remove Travis configuration. --- .travis.yml | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index b9774a23..00000000 --- a/.travis.yml +++ /dev/null @@ -1,29 +0,0 @@ -language: python -python: - - "3.5" - - "3.6" - - "3.7" - - "3.8" - - "3.9-dev" - - "nightly" - - "pypy3" - -matrix: - include: - - stage: codecheck - python: 3.8 - install: - - pip install flake8 - script: - - flake8 sqlparse - after_success: skip - -install: - - pip install -Uq pytest pytest-cov codecov - - pytest --version - -script: - - pytest --cov=sqlparse - -after_success: - - codecov From 3bc7d93254cbef71bdef91905f1814201a1b1f02 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 10 Sep 2021 14:19:19 +0200 Subject: [PATCH 08/61] Create codeql-analysis.yml --- .github/workflows/codeql-analysis.yml | 71 +++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 .github/workflows/codeql-analysis.yml diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 00000000..82f63a6a --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,71 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL" + +on: + push: + branches: [ master ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ master ] + schedule: + - cron: '25 5 * * 1' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'python' ] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] + # Learn more: + # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + # queries: ./path/to/local/query, your-org/your-repo/queries@main + + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v1 + + # ℹ️ Command-line programs to run using the OS shell. + # 📚 https://git.io/JvXDl + + # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines + # and modify them (or add more) to build your code if your project + # uses a compiled language + + #- run: | + # make bootstrap + # make release + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 From 83e5381fc320f06f932d10bc0691ad970ef7962f Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Thu, 28 Apr 2022 16:38:27 +0200 Subject: [PATCH 09/61] CHANGELOG: Fix typo --- CHANGELOG | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 653ff4d1..fb66ab5e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -84,7 +84,7 @@ Bug Fixes * Remove support for parsing double slash comments introduced in 0.3.0 (issue456) as it had some side-effects with other dialects and doesn't seem to be widely used (issue476). -* Restrict detection of alias names to objects that acutally could +* Restrict detection of alias names to objects that actually could have an alias (issue455, adopted some parts of pr509 by john-bodley). * Fix parsing of date/time literals (issue438, by vashek). * Fix initialization of TokenList (issue499, pr505 by john-bodley). From 6c409fc1fa5e9ed0913220f5af81770498124df1 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 7 Aug 2022 08:57:35 +0200 Subject: [PATCH 10/61] Cleanup whitespace in README. --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index a8408869..13ec7b16 100644 --- a/README.rst +++ b/README.rst @@ -59,7 +59,7 @@ Documentation https://sqlparse.readthedocs.io/ Online Demo - https://sqlformat.org/ + https://sqlformat.org/ sqlparse is licensed under the BSD license. From 538eea5a9e2f8d487cfc2d900310a98507cf2b37 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Fri, 29 Apr 2022 09:38:14 +0200 Subject: [PATCH 11/61] GitHub Actions: Add Py3.10, update Actions, more flake8 tests "3.10" must be quoted in yaml https://dev.to/hugovk/the-python-3-1-problem-85g --- .github/workflows/python-app.yml | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index b23198d7..281a4b74 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -15,28 +15,22 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.5, 3.6, 3.7, 3.8, 3.9] + python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip - pip install flake8 pytest pytest-cov codecov + pip install codecov flake8 pytest pytest-cov if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 sqlparse --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 sqlparse --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + run: flake8 sqlparse --count --max-complexity=31 --max-line-length=88 --show-source --statistics - name: Test with pytest - run: | - pytest --cov=sqlparse + run: pytest --cov=sqlparse - name: Publish to codecov - run: | - codecov + run: codecov From 66e651a5f38c6d7a9872eb86b1dbeb451495a9a2 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Thu, 4 Aug 2022 10:52:10 +0200 Subject: [PATCH 12/61] Test on Python 3.11 beta 5 --- .github/workflows/python-app.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 281a4b74..337e7400 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -15,12 +15,12 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10"] + python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11-dev"] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies From 83f905841f2b2aef4a92f77a6b7b78d45646acf7 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 7 Aug 2022 09:04:58 +0200 Subject: [PATCH 13/61] Reduce max line length in flake8 to the default of 79. --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 337e7400..4f165859 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -29,7 +29,7 @@ jobs: pip install codecov flake8 pytest pytest-cov if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Lint with flake8 - run: flake8 sqlparse --count --max-complexity=31 --max-line-length=88 --show-source --statistics + run: flake8 sqlparse --count --max-complexity=31 --show-source --statistics - name: Test with pytest run: pytest --cov=sqlparse - name: Publish to codecov From c504b4ae278f84f051c6437fba5639f4c894fffd Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 7 Aug 2022 09:07:27 +0200 Subject: [PATCH 14/61] Update authors and changelog. --- AUTHORS | 1 + CHANGELOG | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 2e31ae0d..bd3d9b82 100644 --- a/AUTHORS +++ b/AUTHORS @@ -16,6 +16,7 @@ Alphabetical list of contributors: * atronah * casey * Cauê Beloni +* Christian Clauss * circld * Corey Zumar * Cristian Orellana diff --git a/CHANGELOG b/CHANGELOG index fb66ab5e..ab624fdc 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,9 @@ Development Version ------------------- -Nothing yet. +Other + +* Update github actions to test on Python 3.10 as well (pr661, by cclaus). Release 0.4.2 (Sep 10, 2021) From 9bf2ec40949e328bfcf993e7e3f9620b0c3bc6bf Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sun, 7 Aug 2022 12:53:00 +0200 Subject: [PATCH 15/61] Update actions in codeql workflow. --- .github/workflows/codeql-analysis.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 82f63a6a..1cde398b 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -39,11 +39,11 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -54,7 +54,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v1 + uses: github/codeql-action/autobuild@v2 # ℹ️ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -68,4 +68,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + uses: github/codeql-action/analyze@v2 From e58781dd632584c783ef082257309ea5efdb6750 Mon Sep 17 00:00:00 2001 From: osmnv <80402144+osmnv@users.noreply.github.com> Date: Mon, 17 Jan 2022 12:47:16 +0300 Subject: [PATCH 16/61] Fix INDICATOR keyword --- sqlparse/keywords.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 68506282..cd17d63d 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -314,7 +314,7 @@ def is_keyword(value): 'INCREMENT': tokens.Keyword, 'INDEX': tokens.Keyword, - 'INDITCATOR': tokens.Keyword, + 'INDICATOR': tokens.Keyword, 'INFIX': tokens.Keyword, 'INHERITS': tokens.Keyword, 'INITIAL': tokens.Keyword, From 402d6e977104a24471097291fd3041e76402a436 Mon Sep 17 00:00:00 2001 From: Hao Xin Date: Fri, 31 Dec 2021 15:14:25 +0800 Subject: [PATCH 17/61] docs: Add a PiPY package badge --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index 13ec7b16..df4e7e36 100644 --- a/README.rst +++ b/README.rst @@ -4,6 +4,7 @@ python-sqlparse - Parse SQL statements |buildstatus|_ |coverage|_ |docs|_ +|packageversion|_ .. docincludebegin @@ -73,3 +74,5 @@ pygments-Homepage: http://pygments.org/ .. _coverage: https://codecov.io/gh/andialbrecht/sqlparse .. |docs| image:: https://readthedocs.org/projects/sqlparse/badge/?version=latest .. _docs: https://sqlparse.readthedocs.io/en/latest/?badge=latest +.. |packageversion| image:: https://img.shields.io/pypi/v/sqlparse?color=%2334D058&label=pypi%20package +.. _packageversion: https://pypi.org/project/sqlparse From 049634a6e6d4d06483f85866d300263672ff68bf Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Mon, 8 Aug 2022 07:52:46 +0200 Subject: [PATCH 18/61] Acknowledge support for Python 3.10 A subset of #645 Follow on to #661 --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index a9127c01..0843b704 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,6 +20,7 @@ classifiers = Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 Programming Language :: Python :: Implementation :: CPython Programming Language :: Python :: Implementation :: PyPy Topic :: Database From 36687995cfd8d2f674c31f967512a6b17858ca0f Mon Sep 17 00:00:00 2001 From: Aki Ariga Date: Tue, 3 May 2022 20:27:17 -0700 Subject: [PATCH 19/61] DIV is Operator --- sqlparse/keywords.py | 1 + tests/test_parse.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index cd17d63d..f2c24fb1 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -241,6 +241,7 @@ def is_keyword(value): 'DISABLE': tokens.Keyword, 'DISCONNECT': tokens.Keyword, 'DISPATCH': tokens.Keyword, + 'DIV': tokens.Operator, 'DO': tokens.Keyword, 'DOMAIN': tokens.Keyword, 'DYNAMIC': tokens.Keyword, diff --git a/tests/test_parse.py b/tests/test_parse.py index 513b4be9..caba537e 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -132,6 +132,12 @@ def test_parse_nested_function(): assert type(t[0]) is sql.Function +def test_parse_div_operator(): + p = sqlparse.parse('col1 DIV 5 AS div_col1')[0].tokens + assert p[0].tokens[0].tokens[2].ttype is T.Operator + assert p[0].get_alias() == 'div_col1' + + def test_quoted_identifier(): t = sqlparse.parse('select x.y as "z" from foo')[0].tokens assert isinstance(t[2], sql.Identifier) From 4073b569428036259a093dcbfe0f67ac3ef3a1e0 Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Wed, 22 Sep 2021 08:29:50 +0200 Subject: [PATCH 20/61] added some builtin types that can appear in Spark create table statements. --- sqlparse/keywords.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index f2c24fb1..88631bb4 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -908,6 +908,7 @@ def is_keyword(value): 'INLINE': tokens.Keyword, 'INSTR': tokens.Keyword, 'LEN': tokens.Keyword, + 'MAP': tokens.Name.Builtin, 'MAXELEMENT': tokens.Keyword, 'MAXINDEX': tokens.Keyword, 'MAX_PART_DATE': tokens.Keyword, @@ -939,9 +940,12 @@ def is_keyword(value): 'SQRT': tokens.Keyword, 'STACK': tokens.Keyword, 'STR': tokens.Keyword, + 'STRING': tokens.Name.Builtin, + 'STRUCT': tokens.Name.Builtin, 'SUBSTR': tokens.Keyword, 'SUMMARY': tokens.Keyword, 'TBLPROPERTIES': tokens.Keyword, + 'TIMESTAMP': tokens.Name.Builtin, 'TIMESTAMP_ISO': tokens.Keyword, 'TO_CHAR': tokens.Keyword, 'TO_DATE': tokens.Keyword, From a3e19f1cbdf929eb45dc0e22d116b466d7b985a1 Mon Sep 17 00:00:00 2001 From: Daniel Harding Date: Wed, 19 May 2021 13:31:19 +0300 Subject: [PATCH 21/61] Don't make slice copies in TokenList._token_matching(). Since we are working with indexes anyway, don't bother calling enumerate() with a slice from self.tokens (which requires copying memory). Instead, just generate the indexes using range() and use normal indexing to access the desired tokens. The old behavior resulted in quadratic runtime with respect to the number of tokens, which significantly impacted performance for statements with very large numbers of tokens. With the new behavior, the runtime is now linear with respect to the number of tokens. --- sqlparse/sql.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 6a32c26a..19c2ed43 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -240,7 +240,10 @@ def _token_matching(self, funcs, start=0, end=None, reverse=False): if func(token): return idx, token else: - for idx, token in enumerate(self.tokens[start:end], start=start): + if end is None: + end = len(self.tokens) + for idx in range(start, end): + token = self.tokens[idx] for func in funcs: if func(token): return idx, token From 5ab834471cf66f718536b34c6b65caab54b01793 Mon Sep 17 00:00:00 2001 From: Daniel Harding Date: Wed, 19 May 2021 13:46:19 +0300 Subject: [PATCH 22/61] Refactor to reduce redundant code. --- sqlparse/sql.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 19c2ed43..586cd216 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -234,19 +234,16 @@ def _token_matching(self, funcs, start=0, end=None, reverse=False): if reverse: assert end is None - for idx in range(start - 2, -1, -1): - token = self.tokens[idx] - for func in funcs: - if func(token): - return idx, token + indexes = range(start - 2, -1, -1) else: if end is None: end = len(self.tokens) - for idx in range(start, end): - token = self.tokens[idx] - for func in funcs: - if func(token): - return idx, token + indexes = range(start, end) + for idx in indexes: + token = self.tokens[idx] + for func in funcs: + if func(token): + return idx, token return None, None def token_first(self, skip_ws=True, skip_cm=False): From 9d2cb6fc950386e9e59f29faf0d3742c4b12572c Mon Sep 17 00:00:00 2001 From: JavierPan Date: Wed, 10 Nov 2021 12:48:18 +0800 Subject: [PATCH 23/61] Add regex pattern to identify REGEXP as a Compasion token --- sqlparse/keywords.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 88631bb4..29f27af8 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -90,6 +90,7 @@ def is_keyword(value): tokens.Keyword), (r"(AT|WITH')\s+TIME\s+ZONE\s+'[^']+'", tokens.Keyword.TZCast), (r'(NOT\s+)?(LIKE|ILIKE|RLIKE)\b', tokens.Operator.Comparison), + (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison), (r'[0-9_A-ZÀ-Ü][_$#\w]*', is_keyword), (r'[;:()\[\],\.]', tokens.Punctuation), (r'[<>=~!]+', tokens.Operator.Comparison), From 3d3df9dcfb68dc3680daa81ac0f747bb3703ad57 Mon Sep 17 00:00:00 2001 From: Erik Cederstrand Date: Tue, 16 Aug 2022 13:49:36 +0200 Subject: [PATCH 24/61] Make tzcast grouping function less eager --- sqlparse/engine/grouping.py | 11 +++++++++-- tests/test_regressions.py | 9 +++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 175ae8e5..2fb0a4ce 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -91,13 +91,20 @@ def group_tzcasts(tlist): def match(token): return token.ttype == T.Keyword.TZCast - def valid(token): + def valid_prev(token): return token is not None + def valid_next(token): + return token is not None and ( + token.is_whitespace + or token.match(T.Keyword, 'AS') + or token.match(*sql.TypedLiteral.M_CLOSE) + ) + def post(tlist, pidx, tidx, nidx): return pidx, nidx - _group(tlist, sql.Identifier, match, valid, valid, post) + _group(tlist, sql.Identifier, match, valid_prev, valid_next, post) def group_typed_literal(tlist): diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 38d18404..4ffc69f3 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -401,6 +401,15 @@ def test_issue489_tzcasts(): assert p.tokens[-1].get_alias() == 'foo' +def test_issue562_tzcasts(): + # Test that whitespace between 'from' and 'bar' is retained + formatted = sqlparse.format( + 'SELECT f(HOUR from bar AT TIME ZONE \'UTC\') from foo', reindent=True + ) + assert formatted == \ + 'SELECT f(HOUR\n from bar AT TIME ZONE \'UTC\')\nfrom foo' + + def test_as_in_parentheses_indents(): # did raise NoneType has no attribute is_group in _process_parentheses formatted = sqlparse.format('(as foo)', reindent=True) From 507d3cf75737ddbf3553e82f73d00a1fd6303fb0 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Tue, 16 Aug 2022 15:53:07 +0200 Subject: [PATCH 25/61] Update authors and changelog. --- AUTHORS | 6 ++++++ CHANGELOG | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/AUTHORS b/AUTHORS index bd3d9b82..e1e20895 100644 --- a/AUTHORS +++ b/AUTHORS @@ -8,6 +8,7 @@ project: https://bitbucket.org/gutworth/six. Alphabetical list of contributors: * Adam Greenhall +* Aki Ariga * Alexander Beedie * Alexey Malyshev * ali-tny @@ -21,16 +22,19 @@ Alphabetical list of contributors: * Corey Zumar * Cristian Orellana * Dag Wieers +* Daniel Harding * Darik Gamble * Demetrio92 * Dennis Taylor * Dvořák Václav +* Erik Cederstrand * Florian Bauer * Fredy Wijaya * Gavin Wahl * hurcy * Ian Robertson * JacekPliszka +* JavierPan * Jean-Martin Archer * Jesús Leganés Combarro "Piranna" * Johannes Hoff @@ -45,6 +49,7 @@ Alphabetical list of contributors: * Mike Amy * mulos * Oleg Broytman +* osmnv <80402144+osmnv@users.noreply.github.com> * Patrick Schemitz * Pi Delport * Prudhvi Vatala @@ -56,6 +61,7 @@ Alphabetical list of contributors: * Ryan Wooden * saaj * Shen Longxing +* Simon Heisterkamp * Sjoerd Job Postmus * Soloman Weng * spigwitmer diff --git a/CHANGELOG b/CHANGELOG index ab624fdc..8ebb6e32 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,18 @@ Development Version ------------------- +Enhancements + +* Add support for DIV operator (pr664, by chezou). +* Add support for additional SPARK keywords (pr643, by mrmasterplan). +* Avoid tokens copy (pr622, by living180). +* Add REGEXP as a comparision (pr647, by PeterSandwich). + +Bug Fixes + +* Fix spelling of INDICATOR keyword (pr653, by ptld). +* Fix formatting error in EXTRACT function (issue562, pr676, by ecederstrand). + Other * Update github actions to test on Python 3.10 as well (pr661, by cclaus). From 0bbfd5f8326fd4291c390858ba8ff482ff5ced70 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Wed, 24 Aug 2022 20:42:01 +0200 Subject: [PATCH 26/61] Add DISTINCTROW keyword (fixes #677). --- CHANGELOG | 1 + sqlparse/keywords.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 8ebb6e32..10185b2b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,6 +7,7 @@ Enhancements * Add support for additional SPARK keywords (pr643, by mrmasterplan). * Avoid tokens copy (pr622, by living180). * Add REGEXP as a comparision (pr647, by PeterSandwich). +* Add DISTINCTROW keyword for MS Access (issue677). Bug Fixes diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 29f27af8..0f3a4596 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -16,6 +16,7 @@ def is_keyword(value): or KEYWORDS_ORACLE.get(val) or KEYWORDS_PLPGSQL.get(val) or KEYWORDS_HQL.get(val) + or KEYWORDS_MSACCESS.get(val) or KEYWORDS.get(val, tokens.Name)), value @@ -962,3 +963,8 @@ def is_keyword(value): 'BREAK': tokens.Keyword, 'LEAVE': tokens.Keyword, } + + +KEYWORDS_MSACCESS = { + 'DISTINCTROW': tokens.Keyword, +} From 4862d68ef15617266c8cc8174beb705fb388ec20 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Wed, 24 Aug 2022 21:05:33 +0200 Subject: [PATCH 27/61] Update Changelog. --- CHANGELOG | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 10185b2b..af30ddb8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -12,7 +12,7 @@ Enhancements Bug Fixes * Fix spelling of INDICATOR keyword (pr653, by ptld). -* Fix formatting error in EXTRACT function (issue562, pr676, by ecederstrand). +* Fix formatting error in EXTRACT function (issue562, issue670, pr676, by ecederstrand). Other From 7de1999a0af9dbbd09f0afb3a50383ad91c22c4c Mon Sep 17 00:00:00 2001 From: Aki Ariga Date: Tue, 3 May 2022 00:33:01 -0700 Subject: [PATCH 28/61] CREATE TABLE tbl AS SELECT should return get_alias() for its column --- sqlparse/engine/grouping.py | 5 ++++- tests/test_grouping.py | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 2fb0a4ce..d250e181 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -341,12 +341,15 @@ def group_aliased(tlist): def group_functions(tlist): has_create = False has_table = False + has_as = False for tmp_token in tlist.tokens: if tmp_token.value == 'CREATE': has_create = True if tmp_token.value == 'TABLE': has_table = True - if has_create and has_table: + if tmp_token.value == 'AS': + has_as = True + if has_create and has_table and not has_as: return tidx, token = tlist.token_next_by(t=T.Name) diff --git a/tests/test_grouping.py b/tests/test_grouping.py index cf629e9c..8c034f9d 100644 --- a/tests/test_grouping.py +++ b/tests/test_grouping.py @@ -324,6 +324,11 @@ def test_grouping_alias_case(): assert p.tokens[0].get_alias() == 'foo' +def test_grouping_alias_ctas(): + p = sqlparse.parse('CREATE TABLE tbl1 AS SELECT coalesce(t1.col1, 0) AS col1 FROM t1')[0] + assert p.tokens[10].get_alias() == 'col1' + assert isinstance(p.tokens[10].tokens[0], sql.Function) + def test_grouping_subquery_no_parens(): # Not totally sure if this is the right approach... # When a THEN clause contains a subquery w/o parenthesis around it *and* From a17248666f8bafaacf16016f797c830af4d0102e Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Wed, 24 Aug 2022 21:09:59 +0200 Subject: [PATCH 29/61] Update Changelog. --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index af30ddb8..1ea1dd0e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -8,6 +8,7 @@ Enhancements * Avoid tokens copy (pr622, by living180). * Add REGEXP as a comparision (pr647, by PeterSandwich). * Add DISTINCTROW keyword for MS Access (issue677). +* Improve parsing of CREATE TABLE AS SELECT (pr662, by chezou). Bug Fixes From 403de6fc648ac4e012b60b49161c3f5f36dc944a Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Sun, 19 Sep 2021 11:56:46 +0200 Subject: [PATCH 30/61] Fixed bad parsing of create table statements that use lower case --- sqlparse/engine/grouping.py | 4 ++-- tests/test_grouping.py | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index d250e181..86d8fc64 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -343,9 +343,9 @@ def group_functions(tlist): has_table = False has_as = False for tmp_token in tlist.tokens: - if tmp_token.value == 'CREATE': + if tmp_token.value.upper() == 'CREATE': has_create = True - if tmp_token.value == 'TABLE': + if tmp_token.value.upper() == 'TABLE': has_table = True if tmp_token.value == 'AS': has_as = True diff --git a/tests/test_grouping.py b/tests/test_grouping.py index 8c034f9d..546ad4b2 100644 --- a/tests/test_grouping.py +++ b/tests/test_grouping.py @@ -660,3 +660,7 @@ def test_grouping_as_cte(): assert p[0].get_alias() is None assert p[2].value == 'AS' assert p[4].value == 'WITH' + +def test_grouping_create_table(): + p = sqlparse.parse("create table db.tbl (a string)")[0].tokens + assert p[4].value == "db.tbl" From 893d7b2e8075d6867d8a9b377faba7786f28f66d Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Wed, 24 Aug 2022 21:19:28 +0200 Subject: [PATCH 31/61] Update CHANGELOG. yeah... I promise to make fewer commits like that in the future... --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index 1ea1dd0e..83510b55 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -14,6 +14,7 @@ Bug Fixes * Fix spelling of INDICATOR keyword (pr653, by ptld). * Fix formatting error in EXTRACT function (issue562, issue670, pr676, by ecederstrand). +* Fix bad parsing of create table statements that use lower case (issue217, pr642, by mrmasterplan). Other From c1a597ee6e8e22c94ca5bdfa1c6665453e0d2c25 Mon Sep 17 00:00:00 2001 From: Long Le Xich Date: Fri, 11 Jun 2021 16:17:56 +0800 Subject: [PATCH 32/61] add backtick to remove_quotes character list --- sqlparse/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlparse/utils.py b/sqlparse/utils.py index 299a84cc..512f0385 100644 --- a/sqlparse/utils.py +++ b/sqlparse/utils.py @@ -55,7 +55,7 @@ def remove_quotes(val): """Helper that removes surrounding quotes from strings.""" if val is None: return - if val[0] in ('"', "'") and val[0] == val[-1]: + if val[0] in ('"', "'", '`') and val[0] == val[-1]: val = val[1:-1] return val From e2698819c4481ad6fdf04f2caf018ff4b6181e6c Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Wed, 24 Aug 2022 21:40:03 +0200 Subject: [PATCH 33/61] Update Changelog and authors. --- AUTHORS | 1 + CHANGELOG | 1 + 2 files changed, 2 insertions(+) diff --git a/AUTHORS b/AUTHORS index e1e20895..1717adff 100644 --- a/AUTHORS +++ b/AUTHORS @@ -44,6 +44,7 @@ Alphabetical list of contributors: * Kevin Jing Qiu * koljonen * Likai Liu +* Long Le Xich * mathilde.oustlant * Michael Schuller * Mike Amy diff --git a/CHANGELOG b/CHANGELOG index 83510b55..e1b3ae63 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,7 @@ Bug Fixes * Fix spelling of INDICATOR keyword (pr653, by ptld). * Fix formatting error in EXTRACT function (issue562, issue670, pr676, by ecederstrand). * Fix bad parsing of create table statements that use lower case (issue217, pr642, by mrmasterplan). +* Handle backtick as valid quote char (issue628, pr629, by codenamelxl). Other From 4235eb89a904216aef9d9ef36a0381be885c87a9 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Wed, 24 Aug 2022 21:46:04 +0200 Subject: [PATCH 34/61] Add tests for utils.remove_quotes. --- tests/test_utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 tests/test_utils.py diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..d020f3fa --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,12 @@ +import pytest + +from sqlparse import utils + + +@pytest.mark.parametrize('value, expected', ( + [None, None], + ['\'foo\'', 'foo'], + ['"foo"', 'foo'], + ['`foo`', 'foo'])) +def test_remove_quotes(value, expected): + assert utils.remove_quotes(value) == expected From 07a2e81532daf62f1f4360e48ff322abeade7315 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Tue, 6 Sep 2022 21:26:49 +0200 Subject: [PATCH 35/61] Add docstring and comments. --- sqlparse/keywords.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 0f3a4596..3aa6c630 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -11,6 +11,11 @@ def is_keyword(value): + """Checks for a keyword. + + If the given value is in one of the KEYWORDS_* dictionary + it's considered a keyword. Otherwise tokens.Name is returned. + """ val = value.upper() return (KEYWORDS_COMMON.get(val) or KEYWORDS_ORACLE.get(val) @@ -57,7 +62,7 @@ def is_keyword(value): # see issue #39 # Spaces around period `schema . name` are valid identifier # TODO: Spaces before period not implemented - (r'[A-ZÀ-Ü]\w*(?=\s*\.)', tokens.Name), # 'Name' . + (r'[A-ZÀ-Ü]\w*(?=\s*\.)', tokens.Name), # 'Name'. # FIXME(atronah): never match, # because `re.match` doesn't work with look-behind regexp feature (r'(?<=\.)[A-ZÀ-Ü]\w*', tokens.Name), # .'Name' @@ -92,6 +97,8 @@ def is_keyword(value): (r"(AT|WITH')\s+TIME\s+ZONE\s+'[^']+'", tokens.Keyword.TZCast), (r'(NOT\s+)?(LIKE|ILIKE|RLIKE)\b', tokens.Operator.Comparison), (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison), + # Check for keywords, also returns tokens.Name if regex matches + # but the match isn't a keyword. (r'[0-9_A-ZÀ-Ü][_$#\w]*', is_keyword), (r'[;:()\[\],\.]', tokens.Punctuation), (r'[<>=~!]+', tokens.Operator.Comparison), From b72a8ff42f53cba0517b1dd9e8af051b4a060ecf Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Sat, 10 Sep 2022 10:08:02 +0200 Subject: [PATCH 36/61] Allow any unicode character as identifier name (fixes #641). --- CHANGELOG | 1 + sqlparse/keywords.py | 2 +- tests/test_parse.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index e1b3ae63..a363b226 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -16,6 +16,7 @@ Bug Fixes * Fix formatting error in EXTRACT function (issue562, issue670, pr676, by ecederstrand). * Fix bad parsing of create table statements that use lower case (issue217, pr642, by mrmasterplan). * Handle backtick as valid quote char (issue628, pr629, by codenamelxl). +* Allow any unicode character as valid identifier name (issue641). Other diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 3aa6c630..d73e1143 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -99,7 +99,7 @@ def is_keyword(value): (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison), # Check for keywords, also returns tokens.Name if regex matches # but the match isn't a keyword. - (r'[0-9_A-ZÀ-Ü][_$#\w]*', is_keyword), + (r'[0-9_\w][_$#\w]*', is_keyword), (r'[;:()\[\],\.]', tokens.Punctuation), (r'[<>=~!]+', tokens.Operator.Comparison), (r'[+/@#%^&|^-]+', tokens.Operator), diff --git a/tests/test_parse.py b/tests/test_parse.py index caba537e..ec327ac8 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -148,6 +148,7 @@ def test_quoted_identifier(): @pytest.mark.parametrize('name', [ 'foo', '_foo', # issue175 '1_data', # valid MySQL table name, see issue337 + '業者名稱', # valid at least for SQLite3, see issue641 ]) def test_valid_identifier_names(name): t = sqlparse.parse(name)[0].tokens From fba15d34a01739c10596d727e20b89762298eea8 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 23 Sep 2022 20:31:10 +0200 Subject: [PATCH 37/61] Bump version. --- CHANGELOG | 4 ++-- sqlparse/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index a363b226..229d9a4d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,5 @@ -Development Version -------------------- +Release 0.4.3 (Sep 23, 2022) +---------------------------- Enhancements diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index 33be0eee..0dd3475e 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -16,7 +16,7 @@ from sqlparse import formatter -__version__ = '0.4.3.dev0' +__version__ = '0.4.3' __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli'] From 9f44d54c07180b826a6276d3acf5e1458b507c3f Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 23 Sep 2022 20:42:55 +0200 Subject: [PATCH 38/61] Switch back to development mode. --- CHANGELOG | 6 ++++++ sqlparse/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 229d9a4d..2b00a890 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +Development Version +------------------- + +Nothing yet. + + Release 0.4.3 (Sep 23, 2022) ---------------------------- diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index 0dd3475e..f901185a 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -16,7 +16,7 @@ from sqlparse import formatter -__version__ = '0.4.3' +__version__ = '0.4.4.dev0' __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli'] From e9241945801808d1db7f76bdccbbe9a200042c37 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 30 Dec 2022 10:57:31 +0100 Subject: [PATCH 39/61] Revert "add regex pattern to identify IN as a Compasion token" This reverts commit 28c4d4026e1d9389a99d8cd627c96fa360c17fc4. See #694. The expectation is that IN is primarily recognized as a keyword, although it acts as a comparison operator. This also matches the definition of IN in most SQL syntax references where it is listed as a reserved keyword (PostgreSQL: https://www.postgresql.org/docs/current/sql-keywords-appendix.html, MySQL: https://dev.mysql.com/doc/refman/8.0/en/keywords.html, for example). --- sqlparse/keywords.py | 2 +- tests/test_grouping.py | 12 +----------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index d73e1143..dff5e1cb 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -50,7 +50,7 @@ def is_keyword(value): (r'(? Date: Fri, 30 Dec 2022 11:08:08 +0100 Subject: [PATCH 40/61] Update changelog. --- CHANGELOG | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 2b00a890..123ed173 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,12 @@ Development Version ------------------- -Nothing yet. +Bug Fixes + +* Revert a change from 0.4.0 that changed IN to be a comparison (issue694). + The primary expectation is that IN is treated as a keyword and not as a + comparison operator. That also follows the definition of reserved keywords + for the major SQL syntax definitions. Release 0.4.3 (Sep 23, 2022) From bacbeff74bc3d1866246bb1f397f18e64a62c27a Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 30 Dec 2022 11:23:29 +0100 Subject: [PATCH 41/61] Update workflow runner. --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 4f165859..e87b5e42 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -12,7 +12,7 @@ on: jobs: build: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 # keep it on 20.04 to have Python 3.5 and 3.6 available strategy: matrix: python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11-dev"] From bf5aff484146ffda3944088c48323ad9272b91fb Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 30 Dec 2022 11:26:49 +0100 Subject: [PATCH 42/61] Update tested Python versions in workflow. --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index e87b5e42..2e07ee10 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-20.04 # keep it on 20.04 to have Python 3.5 and 3.6 available strategy: matrix: - python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11-dev"] + python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"] steps: - uses: actions/checkout@v3 From 243da5137c6d21b7b246f884fb07e1f0625f2673 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 30 Dec 2022 11:32:09 +0100 Subject: [PATCH 43/61] Setup a nightly build, even without changes in the module itself. --- .github/workflows/python-app.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 2e07ee10..1a173f81 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -8,6 +8,8 @@ on: branches: [ master ] pull_request: branches: [ master ] + schedule: + cron: '0 12 * * *' jobs: build: From cda0e499a1c762662d2b06b18e7b4aed2da75bc7 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 30 Dec 2022 11:32:54 +0100 Subject: [PATCH 44/61] Fix schedule trigger syntax. --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 1a173f81..fef18a5d 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -9,7 +9,7 @@ on: pull_request: branches: [ master ] schedule: - cron: '0 12 * * *' + - cron: '0 12 * * *' jobs: build: From 8b789f286e1b6cbf05c15020ea7544cb7f02f8f7 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 30 Dec 2022 15:44:37 +0100 Subject: [PATCH 45/61] Switch to pyproject.toml (fixes #685). --- .github/workflows/python-app.yml | 9 ++-- CHANGELOG | 4 ++ MANIFEST.in | 11 ----- Makefile | 2 +- pyproject.toml | 70 ++++++++++++++++++++++++++++++++ setup.cfg | 55 ------------------------- setup.py | 12 ------ 7 files changed, 80 insertions(+), 83 deletions(-) delete mode 100644 MANIFEST.in create mode 100644 pyproject.toml delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index fef18a5d..906ca7e8 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -5,7 +5,8 @@ name: Python application on: push: - branches: [ master ] + branches: + - master pull_request: branches: [ master ] schedule: @@ -27,9 +28,9 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip - pip install codecov flake8 pytest pytest-cov - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + python -m pip install --upgrade pip flit + flit install --deps=develop + pip install codecov - name: Lint with flake8 run: flake8 sqlparse --count --max-complexity=31 --show-source --statistics - name: Test with pytest diff --git a/CHANGELOG b/CHANGELOG index 123ed173..94864138 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -8,6 +8,10 @@ Bug Fixes comparison operator. That also follows the definition of reserved keywords for the major SQL syntax definitions. +Other + +* sqlparse now uses pyproject.toml instead of setup.cfg (issue685). + Release 0.4.3 (Sep 23, 2022) ---------------------------- diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 8043b359..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,11 +0,0 @@ -recursive-include docs source/* -include docs/sqlformat.1 -include docs/Makefile -recursive-include tests *.py *.sql -include LICENSE -include TODO -include AUTHORS -include CHANGELOG -include Makefile -include setup.cfg -include tox.ini diff --git a/Makefile b/Makefile index ee35e546..1657822e 100644 --- a/Makefile +++ b/Makefile @@ -22,5 +22,5 @@ clean: release: @rm -rf dist/ - python setup.py sdist bdist_wheel + python -m build twine upload --sign --identity E0B84F81 dist/* diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..338a53ce --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,70 @@ +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "sqlparse" +description = "A non-validating SQL parser." +authors = [{name = "Andi Albrecht", email = "albrecht.andi@gmail.com"}] +readme = "README.rst" +dynamic = ["version"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Topic :: Database", + "Topic :: Software Development", +] +requires-python = ">=3.5" + +[project.urls] +Home = "https://github.com/andialbrecht/sqlparse" +Documentation = "https://sqlparse.readthedocs.io/" +"Release Notes" = "https://sqlparse.readthedocs.io/en/latest/changes/" +Source = "https://github.com/andialbrecht/sqlparse" +Tracker = "https://github.com/andialbrecht/sqlparse/issues" + +[project.scripts] +sqlformat = "sqlparse.__main__:main" + +[project.optional-dependencies] +dev = [ + "flake8", + "build", +] +test = [ + "pytest", + "pytest-cov", +] +doc = [ + "sphinx", +] + +[tool.flit.sdist] +include = [ + "docs/source/", + "docs/sqlformat.1", + "docs/Makefile", + "tests/*.py", "tests/files/*.sql", + "LICENSE", + "TODO", + "AUTHORS", + "CHANGELOG", + "Makefile", + "tox.ini", +] + +[tool.coverage.run] +omit = ["sqlparse/__main__.py"] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 0843b704..00000000 --- a/setup.cfg +++ /dev/null @@ -1,55 +0,0 @@ -[metadata] -name = sqlparse -version = attr: sqlparse.__version__ -url = https://github.com/andialbrecht/sqlparse -author = Andi Albrecht -author_email = albrecht.andi@gmail.com -description = A non-validating SQL parser. -long_description = file: README.rst -license = BSD-3-Clause -classifiers = - Development Status :: 5 - Production/Stable - Intended Audience :: Developers - License :: OSI Approved :: BSD License - Operating System :: OS Independent - Programming Language :: Python - Programming Language :: Python :: 3 - Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.5 - Programming Language :: Python :: 3.6 - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: Implementation :: CPython - Programming Language :: Python :: Implementation :: PyPy - Topic :: Database - Topic :: Software Development -project_urls = - Documentation = https://sqlparse.readthedocs.io/ - Release Notes = https://sqlparse.readthedocs.io/en/latest/changes/ - Source = https://github.com/andialbrecht/sqlparse - Tracker = https://github.com/andialbrecht/sqlparse/issues - -[options] -python_requires = >=3.5 -packages = find: - -[options.packages.find] -exclude = tests - -[options.entry_points] -console_scripts = - sqlformat = sqlparse.__main__:main - -[tool:pytest] -xfail_strict = True - -[flake8] -extend-ignore = - E731 - -[coverage:run] -branch = False -omit = - sqlparse/__main__.py diff --git a/setup.py b/setup.py deleted file mode 100644 index ede0aff8..00000000 --- a/setup.py +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (C) 2009-2020 the sqlparse authors and contributors -# -# -# This setup script is part of python-sqlparse and is released under -# the BSD License: https://opensource.org/licenses/BSD-3-Clause - -from setuptools import setup - - -setup() From 9a1cb5dddd1545c30b1e3a2c6f5d3514d079d93e Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Wed, 30 Nov 2022 14:51:58 +0000 Subject: [PATCH 46/61] configurable syntax --- sqlparse/keywords.py | 22 ++++------- sqlparse/lexer.py | 87 ++++++++++++++++++++++++++++++++++-------- tests/test_keywords.py | 3 +- 3 files changed, 82 insertions(+), 30 deletions(-) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index dff5e1cb..ce537812 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -6,23 +6,17 @@ # the BSD License: https://opensource.org/licenses/BSD-3-Clause import re +from typing import Dict, List, Tuple, Callable, Union from sqlparse import tokens +# object() only supports "is" and is useful as a marker +PROCESS_AS_KEYWORD = object() -def is_keyword(value): - """Checks for a keyword. - - If the given value is in one of the KEYWORDS_* dictionary - it's considered a keyword. Otherwise tokens.Name is returned. - """ - val = value.upper() - return (KEYWORDS_COMMON.get(val) - or KEYWORDS_ORACLE.get(val) - or KEYWORDS_PLPGSQL.get(val) - or KEYWORDS_HQL.get(val) - or KEYWORDS_MSACCESS.get(val) - or KEYWORDS.get(val, tokens.Name)), value +SQL_REGEX_TYPE = List[ + Tuple[Callable, Union[type(PROCESS_AS_KEYWORD), tokens._TokenType]] +] +KEYWORDS_TYPE = Dict[str, tokens._TokenType] SQL_REGEX = { @@ -99,7 +93,7 @@ def is_keyword(value): (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison), # Check for keywords, also returns tokens.Name if regex matches # but the match isn't a keyword. - (r'[0-9_\w][_$#\w]*', is_keyword), + (r'[0-9_\w][_$#\w]*', PROCESS_AS_KEYWORD), (r'[;:()\[\],\.]', tokens.Punctuation), (r'[<>=~!]+', tokens.Operator.Comparison), (r'[+/@#%^&|^-]+', tokens.Operator), diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 4397f185..61c52a97 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -13,19 +13,74 @@ # and to allow some customizations. from io import TextIOBase +from typing import List -from sqlparse import tokens -from sqlparse.keywords import SQL_REGEX +from sqlparse import tokens, keywords from sqlparse.utils import consume -class Lexer: - """Lexer - Empty class. Leaving for backwards-compatibility - """ +class _LexerSingletonMetaclass(type): + _lexer_instance = None + + def __call__(cls, *args, **kwargs): + if _LexerSingletonMetaclass._lexer_instance is None: + _LexerSingletonMetaclass._lexer_instance = super( + _LexerSingletonMetaclass, cls + ).__call__(*args, **kwargs) + return _LexerSingletonMetaclass._lexer_instance + + +class Lexer(metaclass=_LexerSingletonMetaclass): + """The Lexer supports configurable syntax. + To add support for additional keywords, use the `add_keywords` method.""" + + _SQL_REGEX: keywords.SQL_REGEX_TYPE + _keywords: List[keywords.KEYWORDS_TYPE] + + def default_initialization(self): + """Initialize the lexer with default dictionaries. + Useful if you need to revert custom syntax settings.""" + self.clear() + self.set_SQL_REGEX(keywords.SQL_REGEX) + self.add_keywords(keywords.KEYWORDS_COMMON) + self.add_keywords(keywords.KEYWORDS_ORACLE) + self.add_keywords(keywords.KEYWORDS_PLPGSQL) + self.add_keywords(keywords.KEYWORDS_HQL) + self.add_keywords(keywords.KEYWORDS_MSACCESS) + self.add_keywords(keywords.KEYWORDS) + + def __init__(self): + self.default_initialization() + + def clear(self): + """Clear all syntax configurations. + Useful if you want to load a reduced set of syntax configurations.""" + self._SQL_REGEX = [] + self._keywords = [] + + def set_SQL_REGEX(self, SQL_REGEX: keywords.SQL_REGEX_TYPE): + """Set the list of regex that will parse the SQL.""" + self._SQL_REGEX = SQL_REGEX + + def add_keywords(self, keywords: keywords.KEYWORDS_TYPE): + """Add keyword dictionaries. Keywords are looked up in the same order + that dictionaries were added.""" + self._keywords.append(keywords) + + def is_keyword(self, value): + """Checks for a keyword. + + If the given value is in one of the KEYWORDS_* dictionary + it's considered a keyword. Otherwise tokens.Name is returned. + """ + val = value.upper() + for kwdict in self._keywords: + if val in kwdict: + return kwdict[val], value + else: + return tokens.Name, value - @staticmethod - def get_tokens(text, encoding=None): + def get_tokens(self, text, encoding=None): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism @@ -48,24 +103,26 @@ def get_tokens(text, encoding=None): text = text.decode(encoding) else: try: - text = text.decode('utf-8') + text = text.decode("utf-8") except UnicodeDecodeError: - text = text.decode('unicode-escape') + text = text.decode("unicode-escape") else: - raise TypeError("Expected text or file-like object, got {!r}". - format(type(text))) + raise TypeError( + "Expected text or file-like object, got {!r}" + .format(type(text)) + ) iterable = enumerate(text) for pos, char in iterable: - for rexmatch, action in SQL_REGEX: + for rexmatch, action in self._SQL_REGEX: m = rexmatch(text, pos) if not m: continue elif isinstance(action, tokens._TokenType): yield action, m.group() - elif callable(action): - yield action(m.group()) + elif action is keywords.PROCESS_AS_KEYWORD: + yield self.is_keyword(m.group()) consume(iterable, m.end() - pos - 1) break diff --git a/tests/test_keywords.py b/tests/test_keywords.py index d4ded4b6..a3b1b385 100644 --- a/tests/test_keywords.py +++ b/tests/test_keywords.py @@ -2,6 +2,7 @@ from sqlparse import tokens from sqlparse.keywords import SQL_REGEX +from sqlparse.lexer import Lexer class TestSQLREGEX: @@ -9,5 +10,5 @@ class TestSQLREGEX: '1.', '-1.', '.1', '-.1']) def test_float_numbers(self, number): - ttype = next(tt for action, tt in SQL_REGEX if action(number)) + ttype = next(tt for action, tt in Lexer()._SQL_REGEX if action(number)) assert tokens.Number.Float == ttype From e37eaea4a78cbb335070ffec018bfc28425aa1a4 Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Wed, 30 Nov 2022 14:52:13 +0000 Subject: [PATCH 47/61] test configurable syntax --- tests/test_parse.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/test_parse.py b/tests/test_parse.py index ec327ac8..c5dfd369 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -5,6 +5,7 @@ import sqlparse from sqlparse import sql, tokens as T +from sqlparse.lexer import Lexer def test_parse_tokenize(): @@ -489,3 +490,45 @@ def test_parenthesis(): T.Newline, T.Newline, T.Punctuation] + + +def test_configurable_syntax(): + sql = """select * from foo BACON SPAM EGGS;""" + # sql="""select * from mydb.mytable BACON SPAM EGGS;""" + tokens = sqlparse.parse(sql)[0] + + assert list( + (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace + ) == [ + (sqlparse.tokens.Keyword.DML, "select"), + (sqlparse.tokens.Wildcard, "*"), + (sqlparse.tokens.Keyword, "from"), + (None, "foo BACON"), + (None, "SPAM EGGS"), + (sqlparse.tokens.Punctuation, ";"), + ] + + Lexer().add_keywords( + { + "BACON": sqlparse.tokens.Name.Builtin, + "SPAM": sqlparse.tokens.Keyword, + "EGGS": sqlparse.tokens.Keyword, + } + ) + + tokens = sqlparse.parse(sql)[0] + + assert list( + (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace + ) == [ + (sqlparse.tokens.Keyword.DML, "select"), + (sqlparse.tokens.Wildcard, "*"), + (sqlparse.tokens.Keyword, "from"), + (None, "foo"), + (sqlparse.tokens.Name.Builtin, "BACON"), + (sqlparse.tokens.Keyword, "SPAM"), + (sqlparse.tokens.Keyword, "EGGS"), + (sqlparse.tokens.Punctuation, ";"), + ] + # reset the syntax for later tests. + Lexer().default_initialization() From 8515d2edd70fc16d69aa7b1094f9b3534dfa74d9 Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Wed, 30 Nov 2022 15:00:03 +0000 Subject: [PATCH 48/61] remove type annotations for python 3.5 compatibility --- sqlparse/keywords.py | 6 ------ sqlparse/lexer.py | 8 ++------ tests/test_parse.py | 1 - 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index ce537812..6bc7937a 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -6,18 +6,12 @@ # the BSD License: https://opensource.org/licenses/BSD-3-Clause import re -from typing import Dict, List, Tuple, Callable, Union from sqlparse import tokens # object() only supports "is" and is useful as a marker PROCESS_AS_KEYWORD = object() -SQL_REGEX_TYPE = List[ - Tuple[Callable, Union[type(PROCESS_AS_KEYWORD), tokens._TokenType]] -] -KEYWORDS_TYPE = Dict[str, tokens._TokenType] - SQL_REGEX = { 'root': [ diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 61c52a97..7408e01a 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -13,7 +13,6 @@ # and to allow some customizations. from io import TextIOBase -from typing import List from sqlparse import tokens, keywords from sqlparse.utils import consume @@ -34,9 +33,6 @@ class Lexer(metaclass=_LexerSingletonMetaclass): """The Lexer supports configurable syntax. To add support for additional keywords, use the `add_keywords` method.""" - _SQL_REGEX: keywords.SQL_REGEX_TYPE - _keywords: List[keywords.KEYWORDS_TYPE] - def default_initialization(self): """Initialize the lexer with default dictionaries. Useful if you need to revert custom syntax settings.""" @@ -58,11 +54,11 @@ def clear(self): self._SQL_REGEX = [] self._keywords = [] - def set_SQL_REGEX(self, SQL_REGEX: keywords.SQL_REGEX_TYPE): + def set_SQL_REGEX(self, SQL_REGEX): """Set the list of regex that will parse the SQL.""" self._SQL_REGEX = SQL_REGEX - def add_keywords(self, keywords: keywords.KEYWORDS_TYPE): + def add_keywords(self, keywords): """Add keyword dictionaries. Keywords are looked up in the same order that dictionaries were added.""" self._keywords.append(keywords) diff --git a/tests/test_parse.py b/tests/test_parse.py index c5dfd369..3018d9ad 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -494,7 +494,6 @@ def test_parenthesis(): def test_configurable_syntax(): sql = """select * from foo BACON SPAM EGGS;""" - # sql="""select * from mydb.mytable BACON SPAM EGGS;""" tokens = sqlparse.parse(sql)[0] assert list( From f9a73a62cfc23b10c38f22a10bd1d4c3edbb286f Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Wed, 30 Nov 2022 22:34:52 +0000 Subject: [PATCH 49/61] test for changing the regex --- sqlparse/lexer.py | 10 ++++------ tests/test_parse.py | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 7408e01a..aafb55f2 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -99,14 +99,12 @@ def get_tokens(self, text, encoding=None): text = text.decode(encoding) else: try: - text = text.decode("utf-8") + text = text.decode('utf-8') except UnicodeDecodeError: - text = text.decode("unicode-escape") + text = text.decode('unicode-escape') else: - raise TypeError( - "Expected text or file-like object, got {!r}" - .format(type(text)) - ) + raise TypeError("Expected text or file-like object, got {!r}". + format(type(text))) iterable = enumerate(text) for pos, char in iterable: diff --git a/tests/test_parse.py b/tests/test_parse.py index 3018d9ad..3ac65001 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -1,10 +1,11 @@ """Tests sqlparse.parse().""" +import re from io import StringIO import pytest import sqlparse -from sqlparse import sql, tokens as T +from sqlparse import sql, tokens as T, keywords from sqlparse.lexer import Lexer @@ -491,8 +492,7 @@ def test_parenthesis(): T.Newline, T.Punctuation] - -def test_configurable_syntax(): +def test_configurable_keywords(): sql = """select * from foo BACON SPAM EGGS;""" tokens = sqlparse.parse(sql)[0] @@ -517,6 +517,9 @@ def test_configurable_syntax(): tokens = sqlparse.parse(sql)[0] + # reset the syntax for later tests. + Lexer().default_initialization() + assert list( (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace ) == [ @@ -529,5 +532,30 @@ def test_configurable_syntax(): (sqlparse.tokens.Keyword, "EGGS"), (sqlparse.tokens.Punctuation, ";"), ] + + +def test_configurable_regex(): + lex = Lexer() + lex.clear() + + my_regex = ( + re.compile(r"ZORDER\s+BY\b", keywords.FLAGS).match, + sqlparse.tokens.Keyword, + ) + + lex.set_SQL_REGEX(keywords.SQL_REGEX[:38] + [my_regex] + keywords.SQL_REGEX[38:]) + lex.add_keywords(keywords.KEYWORDS_COMMON) + lex.add_keywords(keywords.KEYWORDS_ORACLE) + lex.add_keywords(keywords.KEYWORDS_PLPGSQL) + lex.add_keywords(keywords.KEYWORDS_HQL) + lex.add_keywords(keywords.KEYWORDS_MSACCESS) + lex.add_keywords(keywords.KEYWORDS) + + tokens = sqlparse.parse("select * from foo zorder by bar;")[0] + # reset the syntax for later tests. Lexer().default_initialization() + + assert list( + (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace + )[4] == (sqlparse.tokens.Keyword, "zorder by") From e0d3928ba69d73ba874ca03ec4395e94cf1ab293 Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Thu, 1 Dec 2022 10:35:42 +0000 Subject: [PATCH 50/61] lexer documentation --- docs/source/extending.rst | 66 +++++++++++++++ docs/source/index.rst | 1 + sqlparse/keywords.py | 164 +++++++++++++++++++------------------- sqlparse/lexer.py | 5 +- tests/test_parse.py | 6 +- 5 files changed, 151 insertions(+), 91 deletions(-) create mode 100644 docs/source/extending.rst diff --git a/docs/source/extending.rst b/docs/source/extending.rst new file mode 100644 index 00000000..f1bd5512 --- /dev/null +++ b/docs/source/extending.rst @@ -0,0 +1,66 @@ +Extending :mod:`sqlparse` +========================= + +.. module:: sqlparse + :synopsis: Extending parsing capability of sqlparse. + +The :mod:`sqlparse` module uses a sql grammar that was tuned through usage and numerous +PR to fit a broad range of SQL syntaxes, but it cannot cater to every given case since +some SQL dialects have adopted conflicting meanings of certain keywords. Sqlparse +therefore exposes a mechanism to configure the fundamental keywords and regular +expressions that parse the language as described below. + +If you find an adaptation that works for your specific use-case. Please consider +contributing it back to the community by opening a PR on +`GitHub `_. + +Configuring the Lexer +--------------------- + +The lexer is a singleton class that breaks down the stream of characters into language +tokens. It does this by using a sequence of regular expressions and keywords that are +listed in the file ``sqlparse.keywords``. Instead of applying these fixed grammar +definitions directly, the lexer is default initialized in its method called +``default_initialization()``. As an api user, you can adapt the Lexer configuration by +applying your own configuration logic. To do so, start out by clearing previous +configurations with ``.clear()``, then apply the SQL list with +``.set_SQL_REGEX(SQL_REGEX)``, and apply keyword lists with ``.add_keywords(KEYWORDS)``. + +You can do so by re-using the expressions in ``sqlparse.keywords`` (see example below), +leaving parts out, or by making up your own master list. + +See the expected types of the arguments by inspecting their structure in +``sqlparse.keywords``. +(For compatibility with python 3.4, this library does not use type-hints.) + +The following example adds support for the expression ``ZORDER BY``, and adds ``BAR`` as +a keyword to the lexer: + +.. code-block:: python + + import re + + import sqlparse + from sqlparse import keywords + from sqlparse.lexer import Lexer + + lex = Lexer() + lex.clear() + + my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) + + # slice the default SQL_REGEX to inject the custom object + lex.set_SQL_REGEX( + keywords.SQL_REGEX[:38] + + [my_regex] + + keywords.SQL_REGEX[38:] + ) + lex.add_keywords(keywords.KEYWORDS_COMMON) + lex.add_keywords(keywords.KEYWORDS_ORACLE) + lex.add_keywords(keywords.KEYWORDS_PLPGSQL) + lex.add_keywords(keywords.KEYWORDS_HQL) + lex.add_keywords(keywords.KEYWORDS_MSACCESS) + lex.add_keywords(keywords.KEYWORDS) + lex.add_keywords({'BAR', sqlparse.tokens.Keyword}) + + sqlparse.parse("select * from foo zorder by bar;") diff --git a/docs/source/index.rst b/docs/source/index.rst index cba33141..e18d2b3c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -20,6 +20,7 @@ Contents api analyzing ui + extending changes license indices diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 6bc7937a..f04f928e 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -5,96 +5,92 @@ # This module is part of python-sqlparse and is released under # the BSD License: https://opensource.org/licenses/BSD-3-Clause -import re - from sqlparse import tokens # object() only supports "is" and is useful as a marker +# use this marker to specify that the given regex in SQL_REGEX +# shall be processed further through a lookup in the KEYWORDS dictionaries PROCESS_AS_KEYWORD = object() -SQL_REGEX = { - 'root': [ - (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint), - (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint), - - (r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single), - (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline), - - (r'(\r\n|\r|\n)', tokens.Newline), - (r'\s+?', tokens.Whitespace), - - (r':=', tokens.Assignment), - (r'::', tokens.Punctuation), - - (r'\*', tokens.Wildcard), - - (r"`(``|[^`])*`", tokens.Name), - (r"´(´´|[^´])*´", tokens.Name), - (r'((?=~!]+', tokens.Operator.Comparison), - (r'[+/@#%^&|^-]+', tokens.Operator), - ]} - -FLAGS = re.IGNORECASE | re.UNICODE -SQL_REGEX = [(re.compile(rx, FLAGS).match, tt) for rx, tt in SQL_REGEX['root']] +SQL_REGEX = [ + (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint), + (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint), + + (r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single), + (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline), + + (r'(\r\n|\r|\n)', tokens.Newline), + (r'\s+?', tokens.Whitespace), + + (r':=', tokens.Assignment), + (r'::', tokens.Punctuation), + + (r'\*', tokens.Wildcard), + + (r"`(``|[^`])*`", tokens.Name), + (r"´(´´|[^´])*´", tokens.Name), + (r'((?=~!]+', tokens.Operator.Comparison), + (r'[+/@#%^&|^-]+', tokens.Operator), +] KEYWORDS = { 'ABORT': tokens.Keyword, diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index aafb55f2..50799df6 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -6,7 +6,7 @@ # the BSD License: https://opensource.org/licenses/BSD-3-Clause """SQL Lexer""" - +import re # This code is based on the SqlLexer in pygments. # http://pygments.org/ # It's separated from the rest of pygments to increase performance @@ -56,7 +56,8 @@ def clear(self): def set_SQL_REGEX(self, SQL_REGEX): """Set the list of regex that will parse the SQL.""" - self._SQL_REGEX = SQL_REGEX + FLAGS = re.IGNORECASE | re.UNICODE + self._SQL_REGEX = [(re.compile(rx, FLAGS).match, tt) for rx, tt in SQL_REGEX] def add_keywords(self, keywords): """Add keyword dictionaries. Keywords are looked up in the same order diff --git a/tests/test_parse.py b/tests/test_parse.py index 3ac65001..017f93ae 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -1,5 +1,4 @@ """Tests sqlparse.parse().""" -import re from io import StringIO import pytest @@ -538,10 +537,7 @@ def test_configurable_regex(): lex = Lexer() lex.clear() - my_regex = ( - re.compile(r"ZORDER\s+BY\b", keywords.FLAGS).match, - sqlparse.tokens.Keyword, - ) + my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) lex.set_SQL_REGEX(keywords.SQL_REGEX[:38] + [my_regex] + keywords.SQL_REGEX[38:]) lex.add_keywords(keywords.KEYWORDS_COMMON) From 4efdc036623e1586206d7132abf95696953deb9a Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Thu, 1 Dec 2022 10:42:44 +0000 Subject: [PATCH 51/61] flake8 --- sqlparse/lexer.py | 5 ++++- tests/test_keywords.py | 1 - tests/test_parse.py | 19 +++++++++++++++---- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 50799df6..657177cb 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -57,7 +57,10 @@ def clear(self): def set_SQL_REGEX(self, SQL_REGEX): """Set the list of regex that will parse the SQL.""" FLAGS = re.IGNORECASE | re.UNICODE - self._SQL_REGEX = [(re.compile(rx, FLAGS).match, tt) for rx, tt in SQL_REGEX] + self._SQL_REGEX = [ + (re.compile(rx, FLAGS).match, tt) + for rx, tt in SQL_REGEX + ] def add_keywords(self, keywords): """Add keyword dictionaries. Keywords are looked up in the same order diff --git a/tests/test_keywords.py b/tests/test_keywords.py index a3b1b385..2eddccce 100644 --- a/tests/test_keywords.py +++ b/tests/test_keywords.py @@ -1,7 +1,6 @@ import pytest from sqlparse import tokens -from sqlparse.keywords import SQL_REGEX from sqlparse.lexer import Lexer diff --git a/tests/test_parse.py b/tests/test_parse.py index 017f93ae..33e8541f 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -491,12 +491,15 @@ def test_parenthesis(): T.Newline, T.Punctuation] + def test_configurable_keywords(): sql = """select * from foo BACON SPAM EGGS;""" tokens = sqlparse.parse(sql)[0] assert list( - (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace + (t.ttype, t.value) + for t in tokens + if t.ttype not in sqlparse.tokens.Whitespace ) == [ (sqlparse.tokens.Keyword.DML, "select"), (sqlparse.tokens.Wildcard, "*"), @@ -520,7 +523,9 @@ def test_configurable_keywords(): Lexer().default_initialization() assert list( - (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace + (t.ttype, t.value) + for t in tokens + if t.ttype not in sqlparse.tokens.Whitespace ) == [ (sqlparse.tokens.Keyword.DML, "select"), (sqlparse.tokens.Wildcard, "*"), @@ -539,7 +544,11 @@ def test_configurable_regex(): my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) - lex.set_SQL_REGEX(keywords.SQL_REGEX[:38] + [my_regex] + keywords.SQL_REGEX[38:]) + lex.set_SQL_REGEX( + keywords.SQL_REGEX[:38] + + [my_regex] + + keywords.SQL_REGEX[38:] + ) lex.add_keywords(keywords.KEYWORDS_COMMON) lex.add_keywords(keywords.KEYWORDS_ORACLE) lex.add_keywords(keywords.KEYWORDS_PLPGSQL) @@ -553,5 +562,7 @@ def test_configurable_regex(): Lexer().default_initialization() assert list( - (t.ttype, t.value) for t in tokens if t.ttype not in sqlparse.tokens.Whitespace + (t.ttype, t.value) + for t in tokens + if t.ttype not in sqlparse.tokens.Whitespace )[4] == (sqlparse.tokens.Keyword, "zorder by") From fbf9a576fe40ad8e4d51bb922bb454c317f73403 Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Sun, 1 Jan 2023 14:20:52 +0000 Subject: [PATCH 52/61] additional documentation --- docs/source/extending.rst | 10 ++++++++++ sqlparse/lexer.py | 4 +++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/source/extending.rst b/docs/source/extending.rst index f1bd5512..97b7d389 100644 --- a/docs/source/extending.rst +++ b/docs/source/extending.rst @@ -44,7 +44,12 @@ a keyword to the lexer: from sqlparse import keywords from sqlparse.lexer import Lexer + # get the lexer singleton object to configure it lex = Lexer() + + # Clear the default configurations. + # After this call, reg-exps and keyword dictionaries need to be loaded + # to make the lexer functional again. lex.clear() my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) @@ -55,12 +60,17 @@ a keyword to the lexer: + [my_regex] + keywords.SQL_REGEX[38:] ) + + # add the default keyword dictionaries lex.add_keywords(keywords.KEYWORDS_COMMON) lex.add_keywords(keywords.KEYWORDS_ORACLE) lex.add_keywords(keywords.KEYWORDS_PLPGSQL) lex.add_keywords(keywords.KEYWORDS_HQL) lex.add_keywords(keywords.KEYWORDS_MSACCESS) lex.add_keywords(keywords.KEYWORDS) + + # add a custom keyword dictionary lex.add_keywords({'BAR', sqlparse.tokens.Keyword}) + # no configuration is passed here. The lexer is used as a singleton. sqlparse.parse("select * from foo zorder by bar;") diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 657177cb..6e17fca2 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -50,7 +50,9 @@ def __init__(self): def clear(self): """Clear all syntax configurations. - Useful if you want to load a reduced set of syntax configurations.""" + Useful if you want to load a reduced set of syntax configurations. + After this call, reg-exps and keyword dictionaries need to be loaded + to make the lexer functional again.""" self._SQL_REGEX = [] self._keywords = [] From 907fb496f90f2719095a1f01fe24db1e5c0e15a8 Mon Sep 17 00:00:00 2001 From: Simon Heisterkamp Date: Sun, 1 Jan 2023 20:59:40 +0000 Subject: [PATCH 53/61] change singleton behavior --- docs/source/extending.rst | 2 +- sqlparse/lexer.py | 52 +++++++++++++++++++++++++-------------- tests/test_keywords.py | 2 +- tests/test_parse.py | 8 +++--- 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/docs/source/extending.rst b/docs/source/extending.rst index 97b7d389..0c10924b 100644 --- a/docs/source/extending.rst +++ b/docs/source/extending.rst @@ -45,7 +45,7 @@ a keyword to the lexer: from sqlparse.lexer import Lexer # get the lexer singleton object to configure it - lex = Lexer() + lex = Lexer.get_default_instance() # Clear the default configurations. # After this call, reg-exps and keyword dictionaries need to be loaded diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 6e17fca2..9d25c9e6 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -7,6 +7,7 @@ """SQL Lexer""" import re + # This code is based on the SqlLexer in pygments. # http://pygments.org/ # It's separated from the rest of pygments to increase performance @@ -18,21 +19,39 @@ from sqlparse.utils import consume -class _LexerSingletonMetaclass(type): - _lexer_instance = None - - def __call__(cls, *args, **kwargs): - if _LexerSingletonMetaclass._lexer_instance is None: - _LexerSingletonMetaclass._lexer_instance = super( - _LexerSingletonMetaclass, cls - ).__call__(*args, **kwargs) - return _LexerSingletonMetaclass._lexer_instance - - -class Lexer(metaclass=_LexerSingletonMetaclass): +class Lexer: """The Lexer supports configurable syntax. To add support for additional keywords, use the `add_keywords` method.""" + _default_intance = None + + # Development notes: + # - This class is prepared to be able to support additional SQL dialects + # in the future by adding additional functions that take the place of + # the function default_initialization() + # - The lexer class uses an explicit singleton behavior with the + # instance-getter method get_default_instance(). This mechanism has + # the advantage that the call signature of the entry-points to the + # sqlparse library are not affected. Also, usage of sqlparse in third + # party code does not need to be adapted. On the other hand, singleton + # behavior is not thread safe, and the current implementation does not + # easily allow for multiple SQL dialects to be parsed in the same + # process. Such behavior can be supported in the future by passing a + # suitably initialized lexer object as an additional parameter to the + # entry-point functions (such as `parse`). Code will need to be written + # to pass down and utilize such an object. The current implementation + # is prepared to support this thread safe approach without the + # default_instance part needing to change interface. + + @classmethod + def get_default_instance(cls): + """Returns the lexer instance used internally + by the sqlparse core functions.""" + if cls._default_intance is None: + cls._default_intance = cls() + cls._default_intance.default_initialization() + return cls._default_intance + def default_initialization(self): """Initialize the lexer with default dictionaries. Useful if you need to revert custom syntax settings.""" @@ -45,13 +64,10 @@ def default_initialization(self): self.add_keywords(keywords.KEYWORDS_MSACCESS) self.add_keywords(keywords.KEYWORDS) - def __init__(self): - self.default_initialization() - def clear(self): """Clear all syntax configurations. Useful if you want to load a reduced set of syntax configurations. - After this call, reg-exps and keyword dictionaries need to be loaded + After this call, regexps and keyword dictionaries need to be loaded to make the lexer functional again.""" self._SQL_REGEX = [] self._keywords = [] @@ -73,7 +89,7 @@ def is_keyword(self, value): """Checks for a keyword. If the given value is in one of the KEYWORDS_* dictionary - it's considered a keyword. Otherwise tokens.Name is returned. + it's considered a keyword. Otherwise, tokens.Name is returned. """ val = value.upper() for kwdict in self._keywords: @@ -136,4 +152,4 @@ def tokenize(sql, encoding=None): Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream of ``(token type, value)`` items. """ - return Lexer().get_tokens(sql, encoding) + return Lexer.get_default_instance().get_tokens(sql, encoding) diff --git a/tests/test_keywords.py b/tests/test_keywords.py index 2eddccce..b26e9b45 100644 --- a/tests/test_keywords.py +++ b/tests/test_keywords.py @@ -9,5 +9,5 @@ class TestSQLREGEX: '1.', '-1.', '.1', '-.1']) def test_float_numbers(self, number): - ttype = next(tt for action, tt in Lexer()._SQL_REGEX if action(number)) + ttype = next(tt for action, tt in Lexer.get_default_instance()._SQL_REGEX if action(number)) assert tokens.Number.Float == ttype diff --git a/tests/test_parse.py b/tests/test_parse.py index 33e8541f..5feef5a7 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -509,7 +509,7 @@ def test_configurable_keywords(): (sqlparse.tokens.Punctuation, ";"), ] - Lexer().add_keywords( + Lexer.get_default_instance().add_keywords( { "BACON": sqlparse.tokens.Name.Builtin, "SPAM": sqlparse.tokens.Keyword, @@ -520,7 +520,7 @@ def test_configurable_keywords(): tokens = sqlparse.parse(sql)[0] # reset the syntax for later tests. - Lexer().default_initialization() + Lexer.get_default_instance().default_initialization() assert list( (t.ttype, t.value) @@ -539,7 +539,7 @@ def test_configurable_keywords(): def test_configurable_regex(): - lex = Lexer() + lex = Lexer.get_default_instance() lex.clear() my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) @@ -559,7 +559,7 @@ def test_configurable_regex(): tokens = sqlparse.parse("select * from foo zorder by bar;")[0] # reset the syntax for later tests. - Lexer().default_initialization() + Lexer.get_default_instance().default_initialization() assert list( (t.ttype, t.value) From dd9d5b91d7aa30e4a000d5370f09dc99378891dc Mon Sep 17 00:00:00 2001 From: Shikanime Deva Date: Mon, 19 Jul 2021 13:56:30 +0200 Subject: [PATCH 54/61] Fix get_type with comments between WITH keyword --- sqlparse/sql.py | 27 ++++++++++++++------------- tests/test_regressions.py | 9 +++++++++ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 586cd216..1ccfbdbe 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -413,27 +413,28 @@ def get_type(self): Whitespaces and comments at the beginning of the statement are ignored. """ - first_token = self.token_first(skip_cm=True) - if first_token is None: + token = self.token_first(skip_cm=True) + if token is None: # An "empty" statement that either has not tokens at all # or only whitespace tokens. return 'UNKNOWN' - elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL): - return first_token.normalized + elif token.ttype in (T.Keyword.DML, T.Keyword.DDL): + return token.normalized - elif first_token.ttype == T.Keyword.CTE: + elif token.ttype == T.Keyword.CTE: # The WITH keyword should be followed by either an Identifier or # an IdentifierList containing the CTE definitions; the actual # DML keyword (e.g. SELECT, INSERT) will follow next. - fidx = self.token_index(first_token) - tidx, token = self.token_next(fidx, skip_ws=True) - if isinstance(token, (Identifier, IdentifierList)): - _, dml_keyword = self.token_next(tidx, skip_ws=True) - - if dml_keyword is not None \ - and dml_keyword.ttype == T.Keyword.DML: - return dml_keyword.normalized + tidx = self.token_index(token) + while tidx is not None: + tidx, token = self.token_next(tidx, skip_ws=True) + if isinstance(token, (Identifier, IdentifierList)): + tidx, token = self.token_next(tidx, skip_ws=True) + + if token is not None \ + and token.ttype == T.Keyword.DML: + return token.normalized # Hmm, probably invalid syntax, so return unknown. return 'UNKNOWN' diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 4ffc69f3..bc8b7dd3 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -427,3 +427,12 @@ def test_splitting_at_and_backticks_issue588(): 'grant foo to user1@`myhost`; grant bar to user1@`myhost`;') assert len(splitted) == 2 assert splitted[-1] == 'grant bar to user1@`myhost`;' + + +def test_comment_between_cte_clauses_issue632(): + p, = sqlparse.parse(""" + WITH foo AS (), + -- A comment before baz subquery + baz AS () + SELECT * FROM baz;""") + assert p.get_type() == "SELECT" From fc76056fb8f0ec713a3f2a2b6206a3336932c382 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Mon, 20 Mar 2023 08:46:10 +0100 Subject: [PATCH 55/61] Cleanup regex for detecting keywords (fixes #709). --- sqlparse/keywords.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index f04f928e..f85d4688 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -86,7 +86,7 @@ (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison), # Check for keywords, also returns tokens.Name if regex matches # but the match isn't a keyword. - (r'[0-9_\w][_$#\w]*', PROCESS_AS_KEYWORD), + (r'\w[$#\w]*', PROCESS_AS_KEYWORD), (r'[;:()\[\],\.]', tokens.Punctuation), (r'[<>=~!]+', tokens.Operator.Comparison), (r'[+/@#%^&|^-]+', tokens.Operator), From b949fdf9a1538f98b57612bef6306fc38f32aaf7 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Fri, 14 Apr 2023 14:51:58 +0200 Subject: [PATCH 56/61] CI: Use codecov action. codecov module is deprecated and was removed from PyPI in favor of the github action. --- .github/workflows/python-app.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 906ca7e8..3033af97 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -30,10 +30,9 @@ jobs: run: | python -m pip install --upgrade pip flit flit install --deps=develop - pip install codecov - name: Lint with flake8 run: flake8 sqlparse --count --max-complexity=31 --show-source --statistics - name: Test with pytest run: pytest --cov=sqlparse - name: Publish to codecov - run: codecov + uses: codecov/codecov-action@v3 From c457abd5f097dd13fb21543381e7cfafe7d31cfb Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Mon, 20 Mar 2023 08:33:46 +0100 Subject: [PATCH 57/61] Remove unnecessary parts in regex for bad escaping. The regex tried to deal with situations where escaping in the SQL to be parsed was suspicious. --- CHANGELOG | 10 ++++++++++ sqlparse/keywords.py | 4 ++-- tests/test_split.py | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 94864138..880a9ca9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,12 +1,22 @@ Development Version ------------------- +Notable Changes + +* IMPORTANT: This release fixes a security vulnerability in the + parser where a regular expression vulnerable to ReDOS (Regular + Expression Denial of Service) was used. See the security advisory + for details: https://github.com/andialbrecht/sqlparse/security/advisories/GHSA-rrm6-wvj7-cwh2 + The vulnerability was discovered by @erik-krogh from GitHub + Security Lab (GHSL). Thanks for reporting! + Bug Fixes * Revert a change from 0.4.0 that changed IN to be a comparison (issue694). The primary expectation is that IN is treated as a keyword and not as a comparison operator. That also follows the definition of reserved keywords for the major SQL syntax definitions. +* Fix regular expressions for string parsing. Other diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index f85d4688..b45f3e0f 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -59,9 +59,9 @@ (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])', tokens.Number.Float), (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer), - (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single), + (r"'(''|\\'|[^'])*'", tokens.String.Single), # not a real string literal in ANSI SQL: - (r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol), + (r'"(""|\\"|[^"])*"', tokens.String.Symbol), (r'(""|".*?[^\\]")', tokens.String.Symbol), # sqlite names can be escaped with [square brackets]. left bracket # cannot be preceded by word character or a right bracket -- diff --git a/tests/test_split.py b/tests/test_split.py index a9d75765..e79750e8 100644 --- a/tests/test_split.py +++ b/tests/test_split.py @@ -18,8 +18,8 @@ def test_split_semicolon(): def test_split_backslash(): - stmts = sqlparse.parse(r"select '\\'; select '\''; select '\\\'';") - assert len(stmts) == 3 + stmts = sqlparse.parse("select '\'; select '\'';") + assert len(stmts) == 2 @pytest.mark.parametrize('fn', ['function.sql', From 64bb91f4880b46f73b4cc9207ae9ccc180d56d1b Mon Sep 17 00:00:00 2001 From: Kevin Stubbings Date: Wed, 22 Mar 2023 16:31:15 -0700 Subject: [PATCH 58/61] Testing branch --- test | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test diff --git a/test b/test new file mode 100644 index 00000000..e69de29b From d9d69f47ed13a583c81473211f44ae320470a58b Mon Sep 17 00:00:00 2001 From: Kevin Stubbings Date: Wed, 22 Mar 2023 16:36:19 -0700 Subject: [PATCH 59/61] Removed test file --- test | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test diff --git a/test b/test deleted file mode 100644 index e69de29b..00000000 From 58dae6fcd2a51209aeccd4fff3b923bf37714e19 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Tue, 18 Apr 2023 10:25:38 +0200 Subject: [PATCH 60/61] Bump version. --- sqlparse/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index f901185a..122595b3 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -16,7 +16,7 @@ from sqlparse import formatter -__version__ = '0.4.4.dev0' +__version__ = '0.4.4' __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli'] From 647d1457acf7d88614215841eb15d423df2a1895 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Tue, 18 Apr 2023 10:29:29 +0200 Subject: [PATCH 61/61] Update Changelog. --- CHANGELOG | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 880a9ca9..a42577e1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,5 @@ -Development Version -------------------- +Release 0.4.4 (Apr 18, 2023) +---------------------------- Notable Changes