-
Notifications
You must be signed in to change notification settings - Fork 98
fix: improve client-side regex statement parser #1328
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,18 +21,18 @@ | |
Statement, | ||
) | ||
|
||
RE_BEGIN = re.compile(r"^\s*(BEGIN|START)(TRANSACTION)?", re.IGNORECASE) | ||
RE_COMMIT = re.compile(r"^\s*(COMMIT)(TRANSACTION)?", re.IGNORECASE) | ||
RE_ROLLBACK = re.compile(r"^\s*(ROLLBACK)(TRANSACTION)?", re.IGNORECASE) | ||
RE_BEGIN = re.compile(r"^\s*(BEGIN|START)(\s+TRANSACTION)?\s*$", re.IGNORECASE) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The lack of |
||
RE_COMMIT = re.compile(r"^\s*(COMMIT)(\s+TRANSACTION)?\s*$", re.IGNORECASE) | ||
RE_ROLLBACK = re.compile(r"^\s*(ROLLBACK)(\s+TRANSACTION)?\s*$", re.IGNORECASE) | ||
RE_SHOW_COMMIT_TIMESTAMP = re.compile( | ||
r"^\s*(SHOW)\s+(VARIABLE)\s+(COMMIT_TIMESTAMP)", re.IGNORECASE | ||
r"^\s*(SHOW)\s+(VARIABLE)\s+(COMMIT_TIMESTAMP)\s*$", re.IGNORECASE | ||
) | ||
RE_SHOW_READ_TIMESTAMP = re.compile( | ||
r"^\s*(SHOW)\s+(VARIABLE)\s+(READ_TIMESTAMP)", re.IGNORECASE | ||
r"^\s*(SHOW)\s+(VARIABLE)\s+(READ_TIMESTAMP)\s*$", re.IGNORECASE | ||
) | ||
RE_START_BATCH_DML = re.compile(r"^\s*(START)\s+(BATCH)\s+(DML)", re.IGNORECASE) | ||
RE_RUN_BATCH = re.compile(r"^\s*(RUN)\s+(BATCH)", re.IGNORECASE) | ||
RE_ABORT_BATCH = re.compile(r"^\s*(ABORT)\s+(BATCH)", re.IGNORECASE) | ||
RE_START_BATCH_DML = re.compile(r"^\s*(START)\s+(BATCH)\s+(DML)\s*$", re.IGNORECASE) | ||
RE_RUN_BATCH = re.compile(r"^\s*(RUN)\s+(BATCH)\s*$", re.IGNORECASE) | ||
RE_ABORT_BATCH = re.compile(r"^\s*(ABORT)\s+(BATCH)\s*$", re.IGNORECASE) | ||
RE_PARTITION_QUERY = re.compile(r"^\s*(PARTITION)\s+(.+)", re.IGNORECASE) | ||
RE_RUN_PARTITION = re.compile(r"^\s*(RUN)\s+(PARTITION)\s+(.+)", re.IGNORECASE) | ||
RE_RUN_PARTITIONED_QUERY = re.compile( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,11 +20,7 @@ | |
from google.cloud import spanner_v1 as spanner | ||
from google.cloud.spanner_dbapi import partition_helper | ||
from google.cloud.spanner_dbapi.batch_dml_executor import BatchMode, BatchDmlExecutor | ||
from google.cloud.spanner_dbapi.parse_utils import _get_statement_type | ||
from google.cloud.spanner_dbapi.parsed_statement import ( | ||
StatementType, | ||
AutocommitDmlMode, | ||
) | ||
from google.cloud.spanner_dbapi.parsed_statement import AutocommitDmlMode | ||
from google.cloud.spanner_dbapi.partition_helper import PartitionId | ||
from google.cloud.spanner_dbapi.parsed_statement import ParsedStatement, Statement | ||
from google.cloud.spanner_dbapi.transaction_helper import TransactionRetryHelper | ||
|
@@ -702,10 +698,6 @@ def set_autocommit_dml_mode( | |
self._autocommit_dml_mode = autocommit_dml_mode | ||
|
||
def _partitioned_query_validation(self, partitioned_query, statement): | ||
if _get_statement_type(Statement(partitioned_query)) is not StatementType.QUERY: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Skip validation here and let Spanner return an error if the statement is not valid for |
||
raise ProgrammingError( | ||
"Only queries can be partitioned. Invalid statement: " + statement.sql | ||
) | ||
if self.read_only is not True and self._client_transaction_started is True: | ||
raise ProgrammingError( | ||
"Partitioned query is not supported, because the connection is in a read/write transaction." | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -155,25 +155,30 @@ | |
STMT_INSERT = "INSERT" | ||
|
||
# Heuristic for identifying statements that don't need to be run as updates. | ||
# TODO: This and the other regexes do not match statements that start with a hint. | ||
RE_NON_UPDATE = re.compile(r"^\W*(SELECT|GRAPH|FROM)", re.IGNORECASE) | ||
|
||
RE_WITH = re.compile(r"^\s*(WITH)", re.IGNORECASE) | ||
|
||
# DDL statements follow | ||
# https://cloud.google.com/spanner/docs/data-definition-language | ||
RE_DDL = re.compile( | ||
r"^\s*(CREATE|ALTER|DROP|GRANT|REVOKE|RENAME)", re.IGNORECASE | re.DOTALL | ||
r"^\s*(CREATE|ALTER|DROP|GRANT|REVOKE|RENAME|ANALYZE)", re.IGNORECASE | re.DOTALL | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
) | ||
|
||
RE_IS_INSERT = re.compile(r"^\s*(INSERT)", re.IGNORECASE | re.DOTALL) | ||
# TODO: These do not match statements that start with a hint. | ||
RE_IS_INSERT = re.compile(r"^\s*(INSERT\s+)", re.IGNORECASE | re.DOTALL) | ||
RE_IS_UPDATE = re.compile(r"^\s*(UPDATE\s+)", re.IGNORECASE | re.DOTALL) | ||
RE_IS_DELETE = re.compile(r"^\s*(DELETE\s+)", re.IGNORECASE | re.DOTALL) | ||
|
||
RE_INSERT = re.compile( | ||
# Only match the `INSERT INTO <table_name> (columns...) | ||
# otherwise the rest of the statement could be a complex | ||
# operation. | ||
r"^\s*INSERT INTO (?P<table_name>[^\s\(\)]+)\s*\((?P<columns>[^\(\)]+)\)", | ||
r"^\s*INSERT(?:\s+INTO)?\s+(?P<table_name>[^\s\(\)]+)\s*\((?P<columns>[^\(\)]+)\)", | ||
re.IGNORECASE | re.DOTALL, | ||
) | ||
"""Deprecated: Use the RE_IS_INSERT, RE_IS_UPDATE, and RE_IS_DELETE regexes""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not a new deprecation, these regular expressions were already 'deprecated' in the sense that:
|
||
|
||
RE_VALUES_TILL_END = re.compile(r"VALUES\s*\(.+$", re.IGNORECASE | re.DOTALL) | ||
|
||
|
@@ -259,8 +264,13 @@ def _get_statement_type(statement): | |
# statements and doesn't yet support WITH for DML statements. | ||
return StatementType.QUERY | ||
|
||
statement.sql = ensure_where_clause(query) | ||
return StatementType.UPDATE | ||
if RE_IS_UPDATE.match(query) or RE_IS_DELETE.match(query): | ||
# TODO: Remove this? It makes more sense to have this in SQLAlchemy and | ||
# Django than here. | ||
statement.sql = ensure_where_clause(query) | ||
return StatementType.UPDATE | ||
|
||
return StatementType.UNKNOWN | ||
|
||
|
||
def sql_pyformat_args_to_spanner(sql, params): | ||
|
@@ -355,7 +365,7 @@ def get_param_types(params): | |
def ensure_where_clause(sql): | ||
""" | ||
Cloud Spanner requires a WHERE clause on UPDATE and DELETE statements. | ||
Add a dummy WHERE clause if non detected. | ||
Add a dummy WHERE clause if not detected. | ||
|
||
:type sql: str | ||
:param sql: SQL code to check. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
|
||
|
||
class StatementType(Enum): | ||
UNKNOWN = 0 | ||
CLIENT_SIDE = 1 | ||
DDL = 2 | ||
QUERY = 3 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This provides a fallback if the statement is not recognized by the client-side parser. Instead of failing directly, we allow it to be sent to Spanner and let the Spanner parser determine whether it is valid or not. This prevents potential new DML syntax in the future from being rejected here (e.g. if we were to support
merge
statements).