From 08b4c39fa3c8352f0ef6a3a79f174c1d6f2f0b6d Mon Sep 17 00:00:00 2001 From: Trey Spiller Date: Fri, 5 Sep 2025 15:05:08 -0500 Subject: [PATCH 1/2] Do not parse quoted built-in types into UDTs --- sqlglot/parser.py | 24 ++++++++++++++++-------- tests/dialects/test_postgres.py | 8 +++++++- tests/dialects/test_redshift.py | 9 ++++++++- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/sqlglot/parser.py b/sqlglot/parser.py index 6a629a542a..e7f729245d 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -5303,22 +5303,30 @@ def _parse_types( any_token=False, tokens=(TokenType.VAR,) ) if isinstance(identifier, exp.Identifier): - tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) + tokens = self.dialect.tokenize(identifier.name) if len(tokens) != 1: self.raise_error("Unexpected identifier", self._prev) if tokens[0].token_type in self.TYPE_TOKENS: - self._prev = tokens[0] - elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: - this = self._parse_user_defined_type(identifier) + type_token = tokens[0].token_type else: - self._retreat(self._index - 1) - return None + # retain quotes + tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) + + if len(tokens) != 1: + self.raise_error("Unexpected identifier", self._prev) + + if self.dialect.SUPPORTS_USER_DEFINED_TYPES: + type_token = None + this = self._parse_user_defined_type(identifier) + else: + self._retreat(self._index - 1) + return None else: return None - - type_token = self._prev.token_type + else: + type_token = self._prev.token_type if type_token == TokenType.PSEUDO_TYPE: return self.expression(exp.PseudoType, this=self._prev.text.upper()) diff --git a/tests/dialects/test_postgres.py b/tests/dialects/test_postgres.py index c458c69183..d1627d2a9d 100644 --- a/tests/dialects/test_postgres.py +++ b/tests/dialects/test_postgres.py @@ -1,4 +1,4 @@ -from sqlglot import ParseError, UnsupportedError, exp, transpile +from sqlglot import ParseError, UnsupportedError, exp, transpile, parse_one from sqlglot.helper import logger as helper_logger from tests.dialects.test_dialect import Validator @@ -805,6 +805,12 @@ def test_postgres(self): ) self.assertIsInstance(self.parse_one("id::UUID"), exp.Cast) + self.validate_identity('1::"int"', "CAST(1 AS INT)") + assert parse_one('1::"int"', read="postgres").to.is_type(exp.DataType.Type.INT) + + self.validate_identity('1::"udt"', 'CAST(1 AS "udt")') + assert parse_one('1::"udt"', read="postgres").to.this == exp.DataType.Type.USERDEFINED + self.validate_identity( "COPY tbl (col1, col2) FROM 'file' WITH (FORMAT format, HEADER MATCH, FREEZE TRUE)" ) diff --git a/tests/dialects/test_redshift.py b/tests/dialects/test_redshift.py index 2551bac38b..6ec30594ad 100644 --- a/tests/dialects/test_redshift.py +++ b/tests/dialects/test_redshift.py @@ -1,4 +1,4 @@ -from sqlglot import exp, parse_one, transpile +from sqlglot import exp, ParseError, parse_one, transpile from tests.dialects.test_dialect import Validator @@ -698,3 +698,10 @@ def test_analyze(self): self.validate_identity("ANALYZE VERBOSE TBL") self.validate_identity("ANALYZE TBL PREDICATE COLUMNS") self.validate_identity("ANALYZE TBL ALL COLUMNS") + + def test_cast(self): + self.validate_identity('1::"int"', "CAST(1 AS INTEGER)") + assert parse_one('1::"int"', read="redshift").to.is_type(exp.DataType.Type.INT) + + with self.assertRaises(ParseError): + parse_one('1::"udt"', read="redshift") From 7de5fb90e9fa505f18f801e3285ced608710c95a Mon Sep 17 00:00:00 2001 From: Trey Spiller Date: Fri, 5 Sep 2025 16:48:46 -0500 Subject: [PATCH 2/2] PR feedback --- sqlglot/parser.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/sqlglot/parser.py b/sqlglot/parser.py index e7f729245d..542a547be0 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -7,7 +7,7 @@ from collections import defaultdict from sqlglot import exp -from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors +from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors from sqlglot.helper import apply_index_offset, ensure_list, seq_get from sqlglot.time import format_time from sqlglot.tokens import Token, Tokenizer, TokenType @@ -5298,35 +5298,29 @@ def _parse_types( this: t.Optional[exp.Expression] = None prefix = self._match_text_seq("SYSUDTLIB", ".") - if not self._match_set(self.TYPE_TOKENS): + if self._match_set(self.TYPE_TOKENS): + type_token = self._prev.token_type + else: + type_token = None identifier = allow_identifiers and self._parse_id_var( any_token=False, tokens=(TokenType.VAR,) ) if isinstance(identifier, exp.Identifier): - tokens = self.dialect.tokenize(identifier.name) + try: + tokens = self.dialect.tokenize(identifier.name) + except TokenError: + tokens = None - if len(tokens) != 1: - self.raise_error("Unexpected identifier", self._prev) - - if tokens[0].token_type in self.TYPE_TOKENS: + if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: type_token = tokens[0].token_type else: - # retain quotes - tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) - - if len(tokens) != 1: - self.raise_error("Unexpected identifier", self._prev) - if self.dialect.SUPPORTS_USER_DEFINED_TYPES: - type_token = None this = self._parse_user_defined_type(identifier) else: self._retreat(self._index - 1) return None else: return None - else: - type_token = self._prev.token_type if type_token == TokenType.PSEUDO_TYPE: return self.expression(exp.PseudoType, this=self._prev.text.upper())