sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce( 154 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 155) -> exp.Coalesce: 156 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 157 158 159def build_locate_strposition(args: t.List): 160 return exp.StrPosition( 161 this=seq_get(args, 1), 162 substr=seq_get(args, 0), 163 position=seq_get(args, 2), 164 ) 165 166 167class _Parser(type): 168 def __new__(cls, clsname, bases, attrs): 169 klass = super().__new__(cls, clsname, bases, attrs) 170 171 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 172 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 173 174 return klass 175 176 177class Parser(metaclass=_Parser): 178 """ 179 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 180 181 Args: 182 error_level: The desired error level. 183 Default: ErrorLevel.IMMEDIATE 184 error_message_context: The amount of context to capture from a query string when displaying 185 the error message (in number of characters). 186 Default: 100 187 max_errors: Maximum number of error messages to include in a raised ParseError. 188 This is only relevant if error_level is ErrorLevel.RAISE. 189 Default: 3 190 """ 191 192 FUNCTIONS: t.Dict[str, t.Callable] = { 193 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 194 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 195 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 196 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 200 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 201 ), 202 "CHAR": lambda args: exp.Chr(expressions=args), 203 "CHR": lambda args: exp.Chr(expressions=args), 204 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 205 "CONCAT": lambda args, dialect: exp.Concat( 206 expressions=args, 207 safe=not dialect.STRICT_STRING_CONCAT, 208 coalesce=dialect.CONCAT_COALESCE, 209 ), 210 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONVERT_TIMEZONE": build_convert_timezone, 216 "DATE_TO_DATE_STR": lambda args: exp.Cast( 217 this=seq_get(args, 0), 218 to=exp.DataType(this=exp.DataType.Type.TEXT), 219 ), 220 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 221 start=seq_get(args, 0), 222 end=seq_get(args, 1), 223 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 224 ), 225 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 226 "HEX": build_hex, 227 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 228 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 229 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 230 "LIKE": build_like, 231 "LOG": build_logarithm, 232 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 233 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 234 "LOWER": build_lower, 235 "LPAD": lambda args: build_pad(args), 236 "LEFTPAD": lambda args: build_pad(args), 237 "LTRIM": lambda args: build_trim(args), 238 "MOD": build_mod, 239 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 240 "RPAD": lambda args: build_pad(args, is_left=False), 241 "RTRIM": lambda args: build_trim(args, is_left=False), 242 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 243 if len(args) != 2 244 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 245 "STRPOS": exp.StrPosition.from_arg_list, 246 "CHARINDEX": lambda args: build_locate_strposition(args), 247 "INSTR": exp.StrPosition.from_arg_list, 248 "LOCATE": lambda args: build_locate_strposition(args), 249 "TIME_TO_TIME_STR": lambda args: exp.Cast( 250 this=seq_get(args, 0), 251 to=exp.DataType(this=exp.DataType.Type.TEXT), 252 ), 253 "TO_HEX": build_hex, 254 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 255 this=exp.Cast( 256 this=seq_get(args, 0), 257 to=exp.DataType(this=exp.DataType.Type.TEXT), 258 ), 259 start=exp.Literal.number(1), 260 length=exp.Literal.number(10), 261 ), 262 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 263 "UPPER": build_upper, 264 "VAR_MAP": build_var_map, 265 } 266 267 NO_PAREN_FUNCTIONS = { 268 TokenType.CURRENT_DATE: exp.CurrentDate, 269 TokenType.CURRENT_DATETIME: exp.CurrentDate, 270 TokenType.CURRENT_TIME: exp.CurrentTime, 271 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 272 TokenType.CURRENT_USER: exp.CurrentUser, 273 } 274 275 STRUCT_TYPE_TOKENS = { 276 TokenType.NESTED, 277 TokenType.OBJECT, 278 TokenType.STRUCT, 279 TokenType.UNION, 280 } 281 282 NESTED_TYPE_TOKENS = { 283 TokenType.ARRAY, 284 TokenType.LIST, 285 TokenType.LOWCARDINALITY, 286 TokenType.MAP, 287 TokenType.NULLABLE, 288 TokenType.RANGE, 289 *STRUCT_TYPE_TOKENS, 290 } 291 292 ENUM_TYPE_TOKENS = { 293 TokenType.DYNAMIC, 294 TokenType.ENUM, 295 TokenType.ENUM8, 296 TokenType.ENUM16, 297 } 298 299 AGGREGATE_TYPE_TOKENS = { 300 TokenType.AGGREGATEFUNCTION, 301 TokenType.SIMPLEAGGREGATEFUNCTION, 302 } 303 304 TYPE_TOKENS = { 305 TokenType.BIT, 306 TokenType.BOOLEAN, 307 TokenType.TINYINT, 308 TokenType.UTINYINT, 309 TokenType.SMALLINT, 310 TokenType.USMALLINT, 311 TokenType.INT, 312 TokenType.UINT, 313 TokenType.BIGINT, 314 TokenType.UBIGINT, 315 TokenType.INT128, 316 TokenType.UINT128, 317 TokenType.INT256, 318 TokenType.UINT256, 319 TokenType.MEDIUMINT, 320 TokenType.UMEDIUMINT, 321 TokenType.FIXEDSTRING, 322 TokenType.FLOAT, 323 TokenType.DOUBLE, 324 TokenType.UDOUBLE, 325 TokenType.CHAR, 326 TokenType.NCHAR, 327 TokenType.VARCHAR, 328 TokenType.NVARCHAR, 329 TokenType.BPCHAR, 330 TokenType.TEXT, 331 TokenType.MEDIUMTEXT, 332 TokenType.LONGTEXT, 333 TokenType.BLOB, 334 TokenType.MEDIUMBLOB, 335 TokenType.LONGBLOB, 336 TokenType.BINARY, 337 TokenType.VARBINARY, 338 TokenType.JSON, 339 TokenType.JSONB, 340 TokenType.INTERVAL, 341 TokenType.TINYBLOB, 342 TokenType.TINYTEXT, 343 TokenType.TIME, 344 TokenType.TIMETZ, 345 TokenType.TIMESTAMP, 346 TokenType.TIMESTAMP_S, 347 TokenType.TIMESTAMP_MS, 348 TokenType.TIMESTAMP_NS, 349 TokenType.TIMESTAMPTZ, 350 TokenType.TIMESTAMPLTZ, 351 TokenType.TIMESTAMPNTZ, 352 TokenType.DATETIME, 353 TokenType.DATETIME2, 354 TokenType.DATETIME64, 355 TokenType.SMALLDATETIME, 356 TokenType.DATE, 357 TokenType.DATE32, 358 TokenType.INT4RANGE, 359 TokenType.INT4MULTIRANGE, 360 TokenType.INT8RANGE, 361 TokenType.INT8MULTIRANGE, 362 TokenType.NUMRANGE, 363 TokenType.NUMMULTIRANGE, 364 TokenType.TSRANGE, 365 TokenType.TSMULTIRANGE, 366 TokenType.TSTZRANGE, 367 TokenType.TSTZMULTIRANGE, 368 TokenType.DATERANGE, 369 TokenType.DATEMULTIRANGE, 370 TokenType.DECIMAL, 371 TokenType.DECIMAL32, 372 TokenType.DECIMAL64, 373 TokenType.DECIMAL128, 374 TokenType.DECIMAL256, 375 TokenType.UDECIMAL, 376 TokenType.BIGDECIMAL, 377 TokenType.UUID, 378 TokenType.GEOGRAPHY, 379 TokenType.GEOMETRY, 380 TokenType.POINT, 381 TokenType.RING, 382 TokenType.LINESTRING, 383 TokenType.MULTILINESTRING, 384 TokenType.POLYGON, 385 TokenType.MULTIPOLYGON, 386 TokenType.HLLSKETCH, 387 TokenType.HSTORE, 388 TokenType.PSEUDO_TYPE, 389 TokenType.SUPER, 390 TokenType.SERIAL, 391 TokenType.SMALLSERIAL, 392 TokenType.BIGSERIAL, 393 TokenType.XML, 394 TokenType.YEAR, 395 TokenType.USERDEFINED, 396 TokenType.MONEY, 397 TokenType.SMALLMONEY, 398 TokenType.ROWVERSION, 399 TokenType.IMAGE, 400 TokenType.VARIANT, 401 TokenType.VECTOR, 402 TokenType.VOID, 403 TokenType.OBJECT, 404 TokenType.OBJECT_IDENTIFIER, 405 TokenType.INET, 406 TokenType.IPADDRESS, 407 TokenType.IPPREFIX, 408 TokenType.IPV4, 409 TokenType.IPV6, 410 TokenType.UNKNOWN, 411 TokenType.NOTHING, 412 TokenType.NULL, 413 TokenType.NAME, 414 TokenType.TDIGEST, 415 TokenType.DYNAMIC, 416 *ENUM_TYPE_TOKENS, 417 *NESTED_TYPE_TOKENS, 418 *AGGREGATE_TYPE_TOKENS, 419 } 420 421 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 422 TokenType.BIGINT: TokenType.UBIGINT, 423 TokenType.INT: TokenType.UINT, 424 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 425 TokenType.SMALLINT: TokenType.USMALLINT, 426 TokenType.TINYINT: TokenType.UTINYINT, 427 TokenType.DECIMAL: TokenType.UDECIMAL, 428 TokenType.DOUBLE: TokenType.UDOUBLE, 429 } 430 431 SUBQUERY_PREDICATES = { 432 TokenType.ANY: exp.Any, 433 TokenType.ALL: exp.All, 434 TokenType.EXISTS: exp.Exists, 435 TokenType.SOME: exp.Any, 436 } 437 438 RESERVED_TOKENS = { 439 *Tokenizer.SINGLE_TOKENS.values(), 440 TokenType.SELECT, 441 } - {TokenType.IDENTIFIER} 442 443 DB_CREATABLES = { 444 TokenType.DATABASE, 445 TokenType.DICTIONARY, 446 TokenType.FILE_FORMAT, 447 TokenType.MODEL, 448 TokenType.NAMESPACE, 449 TokenType.SCHEMA, 450 TokenType.SEQUENCE, 451 TokenType.SINK, 452 TokenType.SOURCE, 453 TokenType.STAGE, 454 TokenType.STORAGE_INTEGRATION, 455 TokenType.STREAMLIT, 456 TokenType.TABLE, 457 TokenType.TAG, 458 TokenType.VIEW, 459 TokenType.WAREHOUSE, 460 } 461 462 CREATABLES = { 463 TokenType.COLUMN, 464 TokenType.CONSTRAINT, 465 TokenType.FOREIGN_KEY, 466 TokenType.FUNCTION, 467 TokenType.INDEX, 468 TokenType.PROCEDURE, 469 *DB_CREATABLES, 470 } 471 472 ALTERABLES = { 473 TokenType.INDEX, 474 TokenType.TABLE, 475 TokenType.VIEW, 476 } 477 478 # Tokens that can represent identifiers 479 ID_VAR_TOKENS = { 480 TokenType.ALL, 481 TokenType.ATTACH, 482 TokenType.VAR, 483 TokenType.ANTI, 484 TokenType.APPLY, 485 TokenType.ASC, 486 TokenType.ASOF, 487 TokenType.AUTO_INCREMENT, 488 TokenType.BEGIN, 489 TokenType.BPCHAR, 490 TokenType.CACHE, 491 TokenType.CASE, 492 TokenType.COLLATE, 493 TokenType.COMMAND, 494 TokenType.COMMENT, 495 TokenType.COMMIT, 496 TokenType.CONSTRAINT, 497 TokenType.COPY, 498 TokenType.CUBE, 499 TokenType.CURRENT_SCHEMA, 500 TokenType.DEFAULT, 501 TokenType.DELETE, 502 TokenType.DESC, 503 TokenType.DESCRIBE, 504 TokenType.DETACH, 505 TokenType.DICTIONARY, 506 TokenType.DIV, 507 TokenType.END, 508 TokenType.EXECUTE, 509 TokenType.EXPORT, 510 TokenType.ESCAPE, 511 TokenType.FALSE, 512 TokenType.FIRST, 513 TokenType.FILTER, 514 TokenType.FINAL, 515 TokenType.FORMAT, 516 TokenType.FULL, 517 TokenType.GET, 518 TokenType.IDENTIFIER, 519 TokenType.IS, 520 TokenType.ISNULL, 521 TokenType.INTERVAL, 522 TokenType.KEEP, 523 TokenType.KILL, 524 TokenType.LEFT, 525 TokenType.LIMIT, 526 TokenType.LOAD, 527 TokenType.MERGE, 528 TokenType.NATURAL, 529 TokenType.NEXT, 530 TokenType.OFFSET, 531 TokenType.OPERATOR, 532 TokenType.ORDINALITY, 533 TokenType.OVERLAPS, 534 TokenType.OVERWRITE, 535 TokenType.PARTITION, 536 TokenType.PERCENT, 537 TokenType.PIVOT, 538 TokenType.PRAGMA, 539 TokenType.PUT, 540 TokenType.RANGE, 541 TokenType.RECURSIVE, 542 TokenType.REFERENCES, 543 TokenType.REFRESH, 544 TokenType.RENAME, 545 TokenType.REPLACE, 546 TokenType.RIGHT, 547 TokenType.ROLLUP, 548 TokenType.ROW, 549 TokenType.ROWS, 550 TokenType.SEMI, 551 TokenType.SET, 552 TokenType.SETTINGS, 553 TokenType.SHOW, 554 TokenType.TEMPORARY, 555 TokenType.TOP, 556 TokenType.TRUE, 557 TokenType.TRUNCATE, 558 TokenType.UNIQUE, 559 TokenType.UNNEST, 560 TokenType.UNPIVOT, 561 TokenType.UPDATE, 562 TokenType.USE, 563 TokenType.VOLATILE, 564 TokenType.WINDOW, 565 *CREATABLES, 566 *SUBQUERY_PREDICATES, 567 *TYPE_TOKENS, 568 *NO_PAREN_FUNCTIONS, 569 } 570 ID_VAR_TOKENS.remove(TokenType.UNION) 571 572 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 573 TokenType.ANTI, 574 TokenType.APPLY, 575 TokenType.ASOF, 576 TokenType.FULL, 577 TokenType.LEFT, 578 TokenType.LOCK, 579 TokenType.NATURAL, 580 TokenType.RIGHT, 581 TokenType.SEMI, 582 TokenType.WINDOW, 583 } 584 585 ALIAS_TOKENS = ID_VAR_TOKENS 586 587 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 588 589 ARRAY_CONSTRUCTORS = { 590 "ARRAY": exp.Array, 591 "LIST": exp.List, 592 } 593 594 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 595 596 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 597 598 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 599 600 FUNC_TOKENS = { 601 TokenType.COLLATE, 602 TokenType.COMMAND, 603 TokenType.CURRENT_DATE, 604 TokenType.CURRENT_DATETIME, 605 TokenType.CURRENT_SCHEMA, 606 TokenType.CURRENT_TIMESTAMP, 607 TokenType.CURRENT_TIME, 608 TokenType.CURRENT_USER, 609 TokenType.FILTER, 610 TokenType.FIRST, 611 TokenType.FORMAT, 612 TokenType.GET, 613 TokenType.GLOB, 614 TokenType.IDENTIFIER, 615 TokenType.INDEX, 616 TokenType.ISNULL, 617 TokenType.ILIKE, 618 TokenType.INSERT, 619 TokenType.LIKE, 620 TokenType.MERGE, 621 TokenType.NEXT, 622 TokenType.OFFSET, 623 TokenType.PRIMARY_KEY, 624 TokenType.RANGE, 625 TokenType.REPLACE, 626 TokenType.RLIKE, 627 TokenType.ROW, 628 TokenType.UNNEST, 629 TokenType.VAR, 630 TokenType.LEFT, 631 TokenType.RIGHT, 632 TokenType.SEQUENCE, 633 TokenType.DATE, 634 TokenType.DATETIME, 635 TokenType.TABLE, 636 TokenType.TIMESTAMP, 637 TokenType.TIMESTAMPTZ, 638 TokenType.TRUNCATE, 639 TokenType.WINDOW, 640 TokenType.XOR, 641 *TYPE_TOKENS, 642 *SUBQUERY_PREDICATES, 643 } 644 645 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 646 TokenType.AND: exp.And, 647 } 648 649 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.COLON_EQ: exp.PropertyEQ, 651 } 652 653 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 654 TokenType.OR: exp.Or, 655 } 656 657 EQUALITY = { 658 TokenType.EQ: exp.EQ, 659 TokenType.NEQ: exp.NEQ, 660 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 661 } 662 663 COMPARISON = { 664 TokenType.GT: exp.GT, 665 TokenType.GTE: exp.GTE, 666 TokenType.LT: exp.LT, 667 TokenType.LTE: exp.LTE, 668 } 669 670 BITWISE = { 671 TokenType.AMP: exp.BitwiseAnd, 672 TokenType.CARET: exp.BitwiseXor, 673 TokenType.PIPE: exp.BitwiseOr, 674 } 675 676 TERM = { 677 TokenType.DASH: exp.Sub, 678 TokenType.PLUS: exp.Add, 679 TokenType.MOD: exp.Mod, 680 TokenType.COLLATE: exp.Collate, 681 } 682 683 FACTOR = { 684 TokenType.DIV: exp.IntDiv, 685 TokenType.LR_ARROW: exp.Distance, 686 TokenType.SLASH: exp.Div, 687 TokenType.STAR: exp.Mul, 688 } 689 690 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 691 692 TIMES = { 693 TokenType.TIME, 694 TokenType.TIMETZ, 695 } 696 697 TIMESTAMPS = { 698 TokenType.TIMESTAMP, 699 TokenType.TIMESTAMPNTZ, 700 TokenType.TIMESTAMPTZ, 701 TokenType.TIMESTAMPLTZ, 702 *TIMES, 703 } 704 705 SET_OPERATIONS = { 706 TokenType.UNION, 707 TokenType.INTERSECT, 708 TokenType.EXCEPT, 709 } 710 711 JOIN_METHODS = { 712 TokenType.ASOF, 713 TokenType.NATURAL, 714 TokenType.POSITIONAL, 715 } 716 717 JOIN_SIDES = { 718 TokenType.LEFT, 719 TokenType.RIGHT, 720 TokenType.FULL, 721 } 722 723 JOIN_KINDS = { 724 TokenType.ANTI, 725 TokenType.CROSS, 726 TokenType.INNER, 727 TokenType.OUTER, 728 TokenType.SEMI, 729 TokenType.STRAIGHT_JOIN, 730 } 731 732 JOIN_HINTS: t.Set[str] = set() 733 734 LAMBDAS = { 735 TokenType.ARROW: lambda self, expressions: self.expression( 736 exp.Lambda, 737 this=self._replace_lambda( 738 self._parse_assignment(), 739 expressions, 740 ), 741 expressions=expressions, 742 ), 743 TokenType.FARROW: lambda self, expressions: self.expression( 744 exp.Kwarg, 745 this=exp.var(expressions[0].name), 746 expression=self._parse_assignment(), 747 ), 748 } 749 750 COLUMN_OPERATORS = { 751 TokenType.DOT: None, 752 TokenType.DOTCOLON: lambda self, this, to: self.expression( 753 exp.JSONCast, 754 this=this, 755 to=to, 756 ), 757 TokenType.DCOLON: lambda self, this, to: self.expression( 758 exp.Cast if self.STRICT_CAST else exp.TryCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.ARROW: lambda self, this, path: self.expression( 763 exp.JSONExtract, 764 this=this, 765 expression=self.dialect.to_json_path(path), 766 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 767 ), 768 TokenType.DARROW: lambda self, this, path: self.expression( 769 exp.JSONExtractScalar, 770 this=this, 771 expression=self.dialect.to_json_path(path), 772 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 773 ), 774 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 775 exp.JSONBExtract, 776 this=this, 777 expression=path, 778 ), 779 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtractScalar, 781 this=this, 782 expression=path, 783 ), 784 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 785 exp.JSONBContains, 786 this=this, 787 expression=key, 788 ), 789 } 790 791 EXPRESSION_PARSERS = { 792 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 793 exp.Column: lambda self: self._parse_column(), 794 exp.Condition: lambda self: self._parse_assignment(), 795 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 796 exp.Expression: lambda self: self._parse_expression(), 797 exp.From: lambda self: self._parse_from(joins=True), 798 exp.Group: lambda self: self._parse_group(), 799 exp.Having: lambda self: self._parse_having(), 800 exp.Hint: lambda self: self._parse_hint_body(), 801 exp.Identifier: lambda self: self._parse_id_var(), 802 exp.Join: lambda self: self._parse_join(), 803 exp.Lambda: lambda self: self._parse_lambda(), 804 exp.Lateral: lambda self: self._parse_lateral(), 805 exp.Limit: lambda self: self._parse_limit(), 806 exp.Offset: lambda self: self._parse_offset(), 807 exp.Order: lambda self: self._parse_order(), 808 exp.Ordered: lambda self: self._parse_ordered(), 809 exp.Properties: lambda self: self._parse_properties(), 810 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 811 exp.Qualify: lambda self: self._parse_qualify(), 812 exp.Returning: lambda self: self._parse_returning(), 813 exp.Select: lambda self: self._parse_select(), 814 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 815 exp.Table: lambda self: self._parse_table_parts(), 816 exp.TableAlias: lambda self: self._parse_table_alias(), 817 exp.Tuple: lambda self: self._parse_value(values=False), 818 exp.Whens: lambda self: self._parse_when_matched(), 819 exp.Where: lambda self: self._parse_where(), 820 exp.Window: lambda self: self._parse_named_window(), 821 exp.With: lambda self: self._parse_with(), 822 "JOIN_TYPE": lambda self: self._parse_join_parts(), 823 } 824 825 STATEMENT_PARSERS = { 826 TokenType.ALTER: lambda self: self._parse_alter(), 827 TokenType.ANALYZE: lambda self: self._parse_analyze(), 828 TokenType.BEGIN: lambda self: self._parse_transaction(), 829 TokenType.CACHE: lambda self: self._parse_cache(), 830 TokenType.COMMENT: lambda self: self._parse_comment(), 831 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 832 TokenType.COPY: lambda self: self._parse_copy(), 833 TokenType.CREATE: lambda self: self._parse_create(), 834 TokenType.DELETE: lambda self: self._parse_delete(), 835 TokenType.DESC: lambda self: self._parse_describe(), 836 TokenType.DESCRIBE: lambda self: self._parse_describe(), 837 TokenType.DROP: lambda self: self._parse_drop(), 838 TokenType.GRANT: lambda self: self._parse_grant(), 839 TokenType.INSERT: lambda self: self._parse_insert(), 840 TokenType.KILL: lambda self: self._parse_kill(), 841 TokenType.LOAD: lambda self: self._parse_load(), 842 TokenType.MERGE: lambda self: self._parse_merge(), 843 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 844 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 845 TokenType.REFRESH: lambda self: self._parse_refresh(), 846 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 847 TokenType.SET: lambda self: self._parse_set(), 848 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 849 TokenType.UNCACHE: lambda self: self._parse_uncache(), 850 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 851 TokenType.UPDATE: lambda self: self._parse_update(), 852 TokenType.USE: lambda self: self._parse_use(), 853 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 854 } 855 856 UNARY_PARSERS = { 857 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 858 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 859 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 860 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 861 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 862 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 863 } 864 865 STRING_PARSERS = { 866 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 867 exp.RawString, this=token.text 868 ), 869 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 870 exp.National, this=token.text 871 ), 872 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 873 TokenType.STRING: lambda self, token: self.expression( 874 exp.Literal, this=token.text, is_string=True 875 ), 876 TokenType.UNICODE_STRING: lambda self, token: self.expression( 877 exp.UnicodeString, 878 this=token.text, 879 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 880 ), 881 } 882 883 NUMERIC_PARSERS = { 884 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 885 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 886 TokenType.HEX_STRING: lambda self, token: self.expression( 887 exp.HexString, 888 this=token.text, 889 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 890 ), 891 TokenType.NUMBER: lambda self, token: self.expression( 892 exp.Literal, this=token.text, is_string=False 893 ), 894 } 895 896 PRIMARY_PARSERS = { 897 **STRING_PARSERS, 898 **NUMERIC_PARSERS, 899 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 900 TokenType.NULL: lambda self, _: self.expression(exp.Null), 901 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 902 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 903 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 904 TokenType.STAR: lambda self, _: self._parse_star_ops(), 905 } 906 907 PLACEHOLDER_PARSERS = { 908 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 909 TokenType.PARAMETER: lambda self: self._parse_parameter(), 910 TokenType.COLON: lambda self: ( 911 self.expression(exp.Placeholder, this=self._prev.text) 912 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 913 else None 914 ), 915 } 916 917 RANGE_PARSERS = { 918 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 919 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 920 TokenType.GLOB: binary_range_parser(exp.Glob), 921 TokenType.ILIKE: binary_range_parser(exp.ILike), 922 TokenType.IN: lambda self, this: self._parse_in(this), 923 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 924 TokenType.IS: lambda self, this: self._parse_is(this), 925 TokenType.LIKE: binary_range_parser(exp.Like), 926 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 927 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 928 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 929 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 930 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 931 } 932 933 PIPE_SYNTAX_TRANSFORM_PARSERS = { 934 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 935 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 936 "ORDER BY": lambda self, query: query.order_by( 937 self._parse_order(), append=False, copy=False 938 ), 939 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 940 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 941 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 942 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 943 } 944 945 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 946 "ALLOWED_VALUES": lambda self: self.expression( 947 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 948 ), 949 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 950 "AUTO": lambda self: self._parse_auto_property(), 951 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 952 "BACKUP": lambda self: self.expression( 953 exp.BackupProperty, this=self._parse_var(any_token=True) 954 ), 955 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 956 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 957 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 958 "CHECKSUM": lambda self: self._parse_checksum(), 959 "CLUSTER BY": lambda self: self._parse_cluster(), 960 "CLUSTERED": lambda self: self._parse_clustered_by(), 961 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 962 exp.CollateProperty, **kwargs 963 ), 964 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 965 "CONTAINS": lambda self: self._parse_contains_property(), 966 "COPY": lambda self: self._parse_copy_property(), 967 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 968 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 969 "DEFINER": lambda self: self._parse_definer(), 970 "DETERMINISTIC": lambda self: self.expression( 971 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 972 ), 973 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 974 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 975 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 976 "DISTKEY": lambda self: self._parse_distkey(), 977 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 978 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 979 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 980 "ENVIRONMENT": lambda self: self.expression( 981 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 982 ), 983 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 984 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 985 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 986 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 987 "FREESPACE": lambda self: self._parse_freespace(), 988 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 989 "HEAP": lambda self: self.expression(exp.HeapProperty), 990 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 991 "IMMUTABLE": lambda self: self.expression( 992 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 993 ), 994 "INHERITS": lambda self: self.expression( 995 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 996 ), 997 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 998 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 999 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1000 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1001 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1002 "LIKE": lambda self: self._parse_create_like(), 1003 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1004 "LOCK": lambda self: self._parse_locking(), 1005 "LOCKING": lambda self: self._parse_locking(), 1006 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1007 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1008 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1009 "MODIFIES": lambda self: self._parse_modifies_property(), 1010 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1011 "NO": lambda self: self._parse_no_property(), 1012 "ON": lambda self: self._parse_on_property(), 1013 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1014 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1015 "PARTITION": lambda self: self._parse_partitioned_of(), 1016 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1017 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1018 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1019 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1020 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1021 "READS": lambda self: self._parse_reads_property(), 1022 "REMOTE": lambda self: self._parse_remote_with_connection(), 1023 "RETURNS": lambda self: self._parse_returns(), 1024 "STRICT": lambda self: self.expression(exp.StrictProperty), 1025 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1026 "ROW": lambda self: self._parse_row(), 1027 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1028 "SAMPLE": lambda self: self.expression( 1029 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1030 ), 1031 "SECURE": lambda self: self.expression(exp.SecureProperty), 1032 "SECURITY": lambda self: self._parse_security(), 1033 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1034 "SETTINGS": lambda self: self._parse_settings_property(), 1035 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1036 "SORTKEY": lambda self: self._parse_sortkey(), 1037 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1038 "STABLE": lambda self: self.expression( 1039 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1040 ), 1041 "STORED": lambda self: self._parse_stored(), 1042 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1043 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1044 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1045 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1046 "TO": lambda self: self._parse_to_table(), 1047 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1048 "TRANSFORM": lambda self: self.expression( 1049 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1050 ), 1051 "TTL": lambda self: self._parse_ttl(), 1052 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1053 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1054 "VOLATILE": lambda self: self._parse_volatile_property(), 1055 "WITH": lambda self: self._parse_with_property(), 1056 } 1057 1058 CONSTRAINT_PARSERS = { 1059 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1060 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1061 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1062 "CHARACTER SET": lambda self: self.expression( 1063 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1064 ), 1065 "CHECK": lambda self: self.expression( 1066 exp.CheckColumnConstraint, 1067 this=self._parse_wrapped(self._parse_assignment), 1068 enforced=self._match_text_seq("ENFORCED"), 1069 ), 1070 "COLLATE": lambda self: self.expression( 1071 exp.CollateColumnConstraint, 1072 this=self._parse_identifier() or self._parse_column(), 1073 ), 1074 "COMMENT": lambda self: self.expression( 1075 exp.CommentColumnConstraint, this=self._parse_string() 1076 ), 1077 "COMPRESS": lambda self: self._parse_compress(), 1078 "CLUSTERED": lambda self: self.expression( 1079 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1080 ), 1081 "NONCLUSTERED": lambda self: self.expression( 1082 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1083 ), 1084 "DEFAULT": lambda self: self.expression( 1085 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1086 ), 1087 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1088 "EPHEMERAL": lambda self: self.expression( 1089 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1090 ), 1091 "EXCLUDE": lambda self: self.expression( 1092 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1093 ), 1094 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1095 "FORMAT": lambda self: self.expression( 1096 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1097 ), 1098 "GENERATED": lambda self: self._parse_generated_as_identity(), 1099 "IDENTITY": lambda self: self._parse_auto_increment(), 1100 "INLINE": lambda self: self._parse_inline(), 1101 "LIKE": lambda self: self._parse_create_like(), 1102 "NOT": lambda self: self._parse_not_constraint(), 1103 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1104 "ON": lambda self: ( 1105 self._match(TokenType.UPDATE) 1106 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1107 ) 1108 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1109 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1110 "PERIOD": lambda self: self._parse_period_for_system_time(), 1111 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1112 "REFERENCES": lambda self: self._parse_references(match=False), 1113 "TITLE": lambda self: self.expression( 1114 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1115 ), 1116 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1117 "UNIQUE": lambda self: self._parse_unique(), 1118 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1119 "WATERMARK": lambda self: self.expression( 1120 exp.WatermarkColumnConstraint, 1121 this=self._match(TokenType.FOR) and self._parse_column(), 1122 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1123 ), 1124 "WITH": lambda self: self.expression( 1125 exp.Properties, expressions=self._parse_wrapped_properties() 1126 ), 1127 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1128 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1129 } 1130 1131 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1132 klass = ( 1133 exp.PartitionedByBucket 1134 if self._prev.text.upper() == "BUCKET" 1135 else exp.PartitionByTruncate 1136 ) 1137 1138 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1139 this, expression = seq_get(args, 0), seq_get(args, 1) 1140 1141 if isinstance(this, exp.Literal): 1142 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1143 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1144 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1145 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1146 # 1147 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1148 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1149 this, expression = expression, this 1150 1151 return self.expression(klass, this=this, expression=expression) 1152 1153 ALTER_PARSERS = { 1154 "ADD": lambda self: self._parse_alter_table_add(), 1155 "AS": lambda self: self._parse_select(), 1156 "ALTER": lambda self: self._parse_alter_table_alter(), 1157 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1158 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1159 "DROP": lambda self: self._parse_alter_table_drop(), 1160 "RENAME": lambda self: self._parse_alter_table_rename(), 1161 "SET": lambda self: self._parse_alter_table_set(), 1162 "SWAP": lambda self: self.expression( 1163 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1164 ), 1165 } 1166 1167 ALTER_ALTER_PARSERS = { 1168 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1169 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1170 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1171 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1172 } 1173 1174 SCHEMA_UNNAMED_CONSTRAINTS = { 1175 "CHECK", 1176 "EXCLUDE", 1177 "FOREIGN KEY", 1178 "LIKE", 1179 "PERIOD", 1180 "PRIMARY KEY", 1181 "UNIQUE", 1182 "WATERMARK", 1183 "BUCKET", 1184 "TRUNCATE", 1185 } 1186 1187 NO_PAREN_FUNCTION_PARSERS = { 1188 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1189 "CASE": lambda self: self._parse_case(), 1190 "CONNECT_BY_ROOT": lambda self: self.expression( 1191 exp.ConnectByRoot, this=self._parse_column() 1192 ), 1193 "IF": lambda self: self._parse_if(), 1194 } 1195 1196 INVALID_FUNC_NAME_TOKENS = { 1197 TokenType.IDENTIFIER, 1198 TokenType.STRING, 1199 } 1200 1201 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1202 1203 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1204 1205 FUNCTION_PARSERS = { 1206 **{ 1207 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1208 }, 1209 **{ 1210 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1211 }, 1212 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1213 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1214 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1215 "DECODE": lambda self: self._parse_decode(), 1216 "EXTRACT": lambda self: self._parse_extract(), 1217 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1218 "GAP_FILL": lambda self: self._parse_gap_fill(), 1219 "JSON_OBJECT": lambda self: self._parse_json_object(), 1220 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1221 "JSON_TABLE": lambda self: self._parse_json_table(), 1222 "MATCH": lambda self: self._parse_match_against(), 1223 "NORMALIZE": lambda self: self._parse_normalize(), 1224 "OPENJSON": lambda self: self._parse_open_json(), 1225 "OVERLAY": lambda self: self._parse_overlay(), 1226 "POSITION": lambda self: self._parse_position(), 1227 "PREDICT": lambda self: self._parse_predict(), 1228 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1229 "STRING_AGG": lambda self: self._parse_string_agg(), 1230 "SUBSTRING": lambda self: self._parse_substring(), 1231 "TRIM": lambda self: self._parse_trim(), 1232 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1233 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1234 "XMLELEMENT": lambda self: self.expression( 1235 exp.XMLElement, 1236 this=self._match_text_seq("NAME") and self._parse_id_var(), 1237 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1238 ), 1239 "XMLTABLE": lambda self: self._parse_xml_table(), 1240 } 1241 1242 QUERY_MODIFIER_PARSERS = { 1243 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1244 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1245 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1246 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1247 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1248 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1249 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1250 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1251 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1252 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1253 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1254 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1255 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1256 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1257 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1258 TokenType.CLUSTER_BY: lambda self: ( 1259 "cluster", 1260 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1261 ), 1262 TokenType.DISTRIBUTE_BY: lambda self: ( 1263 "distribute", 1264 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1265 ), 1266 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1267 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1268 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1269 } 1270 1271 SET_PARSERS = { 1272 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1273 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1274 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1275 "TRANSACTION": lambda self: self._parse_set_transaction(), 1276 } 1277 1278 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1279 1280 TYPE_LITERAL_PARSERS = { 1281 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1282 } 1283 1284 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1285 1286 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1287 1288 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1289 1290 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1291 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1292 "ISOLATION": ( 1293 ("LEVEL", "REPEATABLE", "READ"), 1294 ("LEVEL", "READ", "COMMITTED"), 1295 ("LEVEL", "READ", "UNCOMITTED"), 1296 ("LEVEL", "SERIALIZABLE"), 1297 ), 1298 "READ": ("WRITE", "ONLY"), 1299 } 1300 1301 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1302 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1303 ) 1304 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1305 1306 CREATE_SEQUENCE: OPTIONS_TYPE = { 1307 "SCALE": ("EXTEND", "NOEXTEND"), 1308 "SHARD": ("EXTEND", "NOEXTEND"), 1309 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1310 **dict.fromkeys( 1311 ( 1312 "SESSION", 1313 "GLOBAL", 1314 "KEEP", 1315 "NOKEEP", 1316 "ORDER", 1317 "NOORDER", 1318 "NOCACHE", 1319 "CYCLE", 1320 "NOCYCLE", 1321 "NOMINVALUE", 1322 "NOMAXVALUE", 1323 "NOSCALE", 1324 "NOSHARD", 1325 ), 1326 tuple(), 1327 ), 1328 } 1329 1330 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1331 1332 USABLES: OPTIONS_TYPE = dict.fromkeys( 1333 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1334 ) 1335 1336 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1337 1338 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1339 "TYPE": ("EVOLUTION",), 1340 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1341 } 1342 1343 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1344 1345 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1346 1347 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1348 "NOT": ("ENFORCED",), 1349 "MATCH": ( 1350 "FULL", 1351 "PARTIAL", 1352 "SIMPLE", 1353 ), 1354 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1355 "USING": ( 1356 "BTREE", 1357 "HASH", 1358 ), 1359 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1360 } 1361 1362 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1363 "NO": ("OTHERS",), 1364 "CURRENT": ("ROW",), 1365 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1366 } 1367 1368 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1369 1370 CLONE_KEYWORDS = {"CLONE", "COPY"} 1371 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1372 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1373 1374 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1375 1376 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1377 1378 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1379 1380 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1381 1382 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1383 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1384 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1385 1386 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1387 1388 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1389 1390 ADD_CONSTRAINT_TOKENS = { 1391 TokenType.CONSTRAINT, 1392 TokenType.FOREIGN_KEY, 1393 TokenType.INDEX, 1394 TokenType.KEY, 1395 TokenType.PRIMARY_KEY, 1396 TokenType.UNIQUE, 1397 } 1398 1399 DISTINCT_TOKENS = {TokenType.DISTINCT} 1400 1401 NULL_TOKENS = {TokenType.NULL} 1402 1403 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1404 1405 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1406 1407 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1408 1409 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1410 1411 ODBC_DATETIME_LITERALS = { 1412 "d": exp.Date, 1413 "t": exp.Time, 1414 "ts": exp.Timestamp, 1415 } 1416 1417 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1418 1419 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1420 1421 # The style options for the DESCRIBE statement 1422 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1423 1424 # The style options for the ANALYZE statement 1425 ANALYZE_STYLES = { 1426 "BUFFER_USAGE_LIMIT", 1427 "FULL", 1428 "LOCAL", 1429 "NO_WRITE_TO_BINLOG", 1430 "SAMPLE", 1431 "SKIP_LOCKED", 1432 "VERBOSE", 1433 } 1434 1435 ANALYZE_EXPRESSION_PARSERS = { 1436 "ALL": lambda self: self._parse_analyze_columns(), 1437 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1438 "DELETE": lambda self: self._parse_analyze_delete(), 1439 "DROP": lambda self: self._parse_analyze_histogram(), 1440 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1441 "LIST": lambda self: self._parse_analyze_list(), 1442 "PREDICATE": lambda self: self._parse_analyze_columns(), 1443 "UPDATE": lambda self: self._parse_analyze_histogram(), 1444 "VALIDATE": lambda self: self._parse_analyze_validate(), 1445 } 1446 1447 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1448 1449 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1450 1451 OPERATION_MODIFIERS: t.Set[str] = set() 1452 1453 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1454 1455 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1456 1457 STRICT_CAST = True 1458 1459 PREFIXED_PIVOT_COLUMNS = False 1460 IDENTIFY_PIVOT_STRINGS = False 1461 1462 LOG_DEFAULTS_TO_LN = False 1463 1464 # Whether the table sample clause expects CSV syntax 1465 TABLESAMPLE_CSV = False 1466 1467 # The default method used for table sampling 1468 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1469 1470 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1471 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1472 1473 # Whether the TRIM function expects the characters to trim as its first argument 1474 TRIM_PATTERN_FIRST = False 1475 1476 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1477 STRING_ALIASES = False 1478 1479 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1480 MODIFIERS_ATTACHED_TO_SET_OP = True 1481 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1482 1483 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1484 NO_PAREN_IF_COMMANDS = True 1485 1486 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1487 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1488 1489 # Whether the `:` operator is used to extract a value from a VARIANT column 1490 COLON_IS_VARIANT_EXTRACT = False 1491 1492 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1493 # If this is True and '(' is not found, the keyword will be treated as an identifier 1494 VALUES_FOLLOWED_BY_PAREN = True 1495 1496 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1497 SUPPORTS_IMPLICIT_UNNEST = False 1498 1499 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1500 INTERVAL_SPANS = True 1501 1502 # Whether a PARTITION clause can follow a table reference 1503 SUPPORTS_PARTITION_SELECTION = False 1504 1505 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1506 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1507 1508 # Whether the 'AS' keyword is optional in the CTE definition syntax 1509 OPTIONAL_ALIAS_TOKEN_CTE = True 1510 1511 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1512 ALTER_RENAME_REQUIRES_COLUMN = True 1513 1514 __slots__ = ( 1515 "error_level", 1516 "error_message_context", 1517 "max_errors", 1518 "dialect", 1519 "sql", 1520 "errors", 1521 "_tokens", 1522 "_index", 1523 "_curr", 1524 "_next", 1525 "_prev", 1526 "_prev_comments", 1527 "_pipe_cte_counter", 1528 ) 1529 1530 # Autofilled 1531 SHOW_TRIE: t.Dict = {} 1532 SET_TRIE: t.Dict = {} 1533 1534 def __init__( 1535 self, 1536 error_level: t.Optional[ErrorLevel] = None, 1537 error_message_context: int = 100, 1538 max_errors: int = 3, 1539 dialect: DialectType = None, 1540 ): 1541 from sqlglot.dialects import Dialect 1542 1543 self.error_level = error_level or ErrorLevel.IMMEDIATE 1544 self.error_message_context = error_message_context 1545 self.max_errors = max_errors 1546 self.dialect = Dialect.get_or_raise(dialect) 1547 self.reset() 1548 1549 def reset(self): 1550 self.sql = "" 1551 self.errors = [] 1552 self._tokens = [] 1553 self._index = 0 1554 self._curr = None 1555 self._next = None 1556 self._prev = None 1557 self._prev_comments = None 1558 self._pipe_cte_counter = 0 1559 1560 def parse( 1561 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1562 ) -> t.List[t.Optional[exp.Expression]]: 1563 """ 1564 Parses a list of tokens and returns a list of syntax trees, one tree 1565 per parsed SQL statement. 1566 1567 Args: 1568 raw_tokens: The list of tokens. 1569 sql: The original SQL string, used to produce helpful debug messages. 1570 1571 Returns: 1572 The list of the produced syntax trees. 1573 """ 1574 return self._parse( 1575 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1576 ) 1577 1578 def parse_into( 1579 self, 1580 expression_types: exp.IntoType, 1581 raw_tokens: t.List[Token], 1582 sql: t.Optional[str] = None, 1583 ) -> t.List[t.Optional[exp.Expression]]: 1584 """ 1585 Parses a list of tokens into a given Expression type. If a collection of Expression 1586 types is given instead, this method will try to parse the token list into each one 1587 of them, stopping at the first for which the parsing succeeds. 1588 1589 Args: 1590 expression_types: The expression type(s) to try and parse the token list into. 1591 raw_tokens: The list of tokens. 1592 sql: The original SQL string, used to produce helpful debug messages. 1593 1594 Returns: 1595 The target Expression. 1596 """ 1597 errors = [] 1598 for expression_type in ensure_list(expression_types): 1599 parser = self.EXPRESSION_PARSERS.get(expression_type) 1600 if not parser: 1601 raise TypeError(f"No parser registered for {expression_type}") 1602 1603 try: 1604 return self._parse(parser, raw_tokens, sql) 1605 except ParseError as e: 1606 e.errors[0]["into_expression"] = expression_type 1607 errors.append(e) 1608 1609 raise ParseError( 1610 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1611 errors=merge_errors(errors), 1612 ) from errors[-1] 1613 1614 def _parse( 1615 self, 1616 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1617 raw_tokens: t.List[Token], 1618 sql: t.Optional[str] = None, 1619 ) -> t.List[t.Optional[exp.Expression]]: 1620 self.reset() 1621 self.sql = sql or "" 1622 1623 total = len(raw_tokens) 1624 chunks: t.List[t.List[Token]] = [[]] 1625 1626 for i, token in enumerate(raw_tokens): 1627 if token.token_type == TokenType.SEMICOLON: 1628 if token.comments: 1629 chunks.append([token]) 1630 1631 if i < total - 1: 1632 chunks.append([]) 1633 else: 1634 chunks[-1].append(token) 1635 1636 expressions = [] 1637 1638 for tokens in chunks: 1639 self._index = -1 1640 self._tokens = tokens 1641 self._advance() 1642 1643 expressions.append(parse_method(self)) 1644 1645 if self._index < len(self._tokens): 1646 self.raise_error("Invalid expression / Unexpected token") 1647 1648 self.check_errors() 1649 1650 return expressions 1651 1652 def check_errors(self) -> None: 1653 """Logs or raises any found errors, depending on the chosen error level setting.""" 1654 if self.error_level == ErrorLevel.WARN: 1655 for error in self.errors: 1656 logger.error(str(error)) 1657 elif self.error_level == ErrorLevel.RAISE and self.errors: 1658 raise ParseError( 1659 concat_messages(self.errors, self.max_errors), 1660 errors=merge_errors(self.errors), 1661 ) 1662 1663 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1664 """ 1665 Appends an error in the list of recorded errors or raises it, depending on the chosen 1666 error level setting. 1667 """ 1668 token = token or self._curr or self._prev or Token.string("") 1669 start = token.start 1670 end = token.end + 1 1671 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1672 highlight = self.sql[start:end] 1673 end_context = self.sql[end : end + self.error_message_context] 1674 1675 error = ParseError.new( 1676 f"{message}. Line {token.line}, Col: {token.col}.\n" 1677 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1678 description=message, 1679 line=token.line, 1680 col=token.col, 1681 start_context=start_context, 1682 highlight=highlight, 1683 end_context=end_context, 1684 ) 1685 1686 if self.error_level == ErrorLevel.IMMEDIATE: 1687 raise error 1688 1689 self.errors.append(error) 1690 1691 def expression( 1692 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1693 ) -> E: 1694 """ 1695 Creates a new, validated Expression. 1696 1697 Args: 1698 exp_class: The expression class to instantiate. 1699 comments: An optional list of comments to attach to the expression. 1700 kwargs: The arguments to set for the expression along with their respective values. 1701 1702 Returns: 1703 The target expression. 1704 """ 1705 instance = exp_class(**kwargs) 1706 instance.add_comments(comments) if comments else self._add_comments(instance) 1707 return self.validate_expression(instance) 1708 1709 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1710 if expression and self._prev_comments: 1711 expression.add_comments(self._prev_comments) 1712 self._prev_comments = None 1713 1714 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1715 """ 1716 Validates an Expression, making sure that all its mandatory arguments are set. 1717 1718 Args: 1719 expression: The expression to validate. 1720 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1721 1722 Returns: 1723 The validated expression. 1724 """ 1725 if self.error_level != ErrorLevel.IGNORE: 1726 for error_message in expression.error_messages(args): 1727 self.raise_error(error_message) 1728 1729 return expression 1730 1731 def _find_sql(self, start: Token, end: Token) -> str: 1732 return self.sql[start.start : end.end + 1] 1733 1734 def _is_connected(self) -> bool: 1735 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1736 1737 def _advance(self, times: int = 1) -> None: 1738 self._index += times 1739 self._curr = seq_get(self._tokens, self._index) 1740 self._next = seq_get(self._tokens, self._index + 1) 1741 1742 if self._index > 0: 1743 self._prev = self._tokens[self._index - 1] 1744 self._prev_comments = self._prev.comments 1745 else: 1746 self._prev = None 1747 self._prev_comments = None 1748 1749 def _retreat(self, index: int) -> None: 1750 if index != self._index: 1751 self._advance(index - self._index) 1752 1753 def _warn_unsupported(self) -> None: 1754 if len(self._tokens) <= 1: 1755 return 1756 1757 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1758 # interested in emitting a warning for the one being currently processed. 1759 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1760 1761 logger.warning( 1762 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1763 ) 1764 1765 def _parse_command(self) -> exp.Command: 1766 self._warn_unsupported() 1767 return self.expression( 1768 exp.Command, 1769 comments=self._prev_comments, 1770 this=self._prev.text.upper(), 1771 expression=self._parse_string(), 1772 ) 1773 1774 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1775 """ 1776 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1777 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1778 solve this by setting & resetting the parser state accordingly 1779 """ 1780 index = self._index 1781 error_level = self.error_level 1782 1783 self.error_level = ErrorLevel.IMMEDIATE 1784 try: 1785 this = parse_method() 1786 except ParseError: 1787 this = None 1788 finally: 1789 if not this or retreat: 1790 self._retreat(index) 1791 self.error_level = error_level 1792 1793 return this 1794 1795 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1796 start = self._prev 1797 exists = self._parse_exists() if allow_exists else None 1798 1799 self._match(TokenType.ON) 1800 1801 materialized = self._match_text_seq("MATERIALIZED") 1802 kind = self._match_set(self.CREATABLES) and self._prev 1803 if not kind: 1804 return self._parse_as_command(start) 1805 1806 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1807 this = self._parse_user_defined_function(kind=kind.token_type) 1808 elif kind.token_type == TokenType.TABLE: 1809 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1810 elif kind.token_type == TokenType.COLUMN: 1811 this = self._parse_column() 1812 else: 1813 this = self._parse_id_var() 1814 1815 self._match(TokenType.IS) 1816 1817 return self.expression( 1818 exp.Comment, 1819 this=this, 1820 kind=kind.text, 1821 expression=self._parse_string(), 1822 exists=exists, 1823 materialized=materialized, 1824 ) 1825 1826 def _parse_to_table( 1827 self, 1828 ) -> exp.ToTableProperty: 1829 table = self._parse_table_parts(schema=True) 1830 return self.expression(exp.ToTableProperty, this=table) 1831 1832 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1833 def _parse_ttl(self) -> exp.Expression: 1834 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1835 this = self._parse_bitwise() 1836 1837 if self._match_text_seq("DELETE"): 1838 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1839 if self._match_text_seq("RECOMPRESS"): 1840 return self.expression( 1841 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1842 ) 1843 if self._match_text_seq("TO", "DISK"): 1844 return self.expression( 1845 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1846 ) 1847 if self._match_text_seq("TO", "VOLUME"): 1848 return self.expression( 1849 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1850 ) 1851 1852 return this 1853 1854 expressions = self._parse_csv(_parse_ttl_action) 1855 where = self._parse_where() 1856 group = self._parse_group() 1857 1858 aggregates = None 1859 if group and self._match(TokenType.SET): 1860 aggregates = self._parse_csv(self._parse_set_item) 1861 1862 return self.expression( 1863 exp.MergeTreeTTL, 1864 expressions=expressions, 1865 where=where, 1866 group=group, 1867 aggregates=aggregates, 1868 ) 1869 1870 def _parse_statement(self) -> t.Optional[exp.Expression]: 1871 if self._curr is None: 1872 return None 1873 1874 if self._match_set(self.STATEMENT_PARSERS): 1875 comments = self._prev_comments 1876 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1877 stmt.add_comments(comments, prepend=True) 1878 return stmt 1879 1880 if self._match_set(self.dialect.tokenizer.COMMANDS): 1881 return self._parse_command() 1882 1883 expression = self._parse_expression() 1884 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1885 return self._parse_query_modifiers(expression) 1886 1887 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1888 start = self._prev 1889 temporary = self._match(TokenType.TEMPORARY) 1890 materialized = self._match_text_seq("MATERIALIZED") 1891 1892 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1893 if not kind: 1894 return self._parse_as_command(start) 1895 1896 concurrently = self._match_text_seq("CONCURRENTLY") 1897 if_exists = exists or self._parse_exists() 1898 1899 if kind == "COLUMN": 1900 this = self._parse_column() 1901 else: 1902 this = self._parse_table_parts( 1903 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1904 ) 1905 1906 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1907 1908 if self._match(TokenType.L_PAREN, advance=False): 1909 expressions = self._parse_wrapped_csv(self._parse_types) 1910 else: 1911 expressions = None 1912 1913 return self.expression( 1914 exp.Drop, 1915 exists=if_exists, 1916 this=this, 1917 expressions=expressions, 1918 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1919 temporary=temporary, 1920 materialized=materialized, 1921 cascade=self._match_text_seq("CASCADE"), 1922 constraints=self._match_text_seq("CONSTRAINTS"), 1923 purge=self._match_text_seq("PURGE"), 1924 cluster=cluster, 1925 concurrently=concurrently, 1926 ) 1927 1928 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1929 return ( 1930 self._match_text_seq("IF") 1931 and (not not_ or self._match(TokenType.NOT)) 1932 and self._match(TokenType.EXISTS) 1933 ) 1934 1935 def _parse_create(self) -> exp.Create | exp.Command: 1936 # Note: this can't be None because we've matched a statement parser 1937 start = self._prev 1938 1939 replace = ( 1940 start.token_type == TokenType.REPLACE 1941 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1942 or self._match_pair(TokenType.OR, TokenType.ALTER) 1943 ) 1944 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1945 1946 unique = self._match(TokenType.UNIQUE) 1947 1948 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1949 clustered = True 1950 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1951 "COLUMNSTORE" 1952 ): 1953 clustered = False 1954 else: 1955 clustered = None 1956 1957 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1958 self._advance() 1959 1960 properties = None 1961 create_token = self._match_set(self.CREATABLES) and self._prev 1962 1963 if not create_token: 1964 # exp.Properties.Location.POST_CREATE 1965 properties = self._parse_properties() 1966 create_token = self._match_set(self.CREATABLES) and self._prev 1967 1968 if not properties or not create_token: 1969 return self._parse_as_command(start) 1970 1971 concurrently = self._match_text_seq("CONCURRENTLY") 1972 exists = self._parse_exists(not_=True) 1973 this = None 1974 expression: t.Optional[exp.Expression] = None 1975 indexes = None 1976 no_schema_binding = None 1977 begin = None 1978 end = None 1979 clone = None 1980 1981 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1982 nonlocal properties 1983 if properties and temp_props: 1984 properties.expressions.extend(temp_props.expressions) 1985 elif temp_props: 1986 properties = temp_props 1987 1988 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1989 this = self._parse_user_defined_function(kind=create_token.token_type) 1990 1991 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1992 extend_props(self._parse_properties()) 1993 1994 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1995 extend_props(self._parse_properties()) 1996 1997 if not expression: 1998 if self._match(TokenType.COMMAND): 1999 expression = self._parse_as_command(self._prev) 2000 else: 2001 begin = self._match(TokenType.BEGIN) 2002 return_ = self._match_text_seq("RETURN") 2003 2004 if self._match(TokenType.STRING, advance=False): 2005 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2006 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2007 expression = self._parse_string() 2008 extend_props(self._parse_properties()) 2009 else: 2010 expression = self._parse_user_defined_function_expression() 2011 2012 end = self._match_text_seq("END") 2013 2014 if return_: 2015 expression = self.expression(exp.Return, this=expression) 2016 elif create_token.token_type == TokenType.INDEX: 2017 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2018 if not self._match(TokenType.ON): 2019 index = self._parse_id_var() 2020 anonymous = False 2021 else: 2022 index = None 2023 anonymous = True 2024 2025 this = self._parse_index(index=index, anonymous=anonymous) 2026 elif create_token.token_type in self.DB_CREATABLES: 2027 table_parts = self._parse_table_parts( 2028 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2029 ) 2030 2031 # exp.Properties.Location.POST_NAME 2032 self._match(TokenType.COMMA) 2033 extend_props(self._parse_properties(before=True)) 2034 2035 this = self._parse_schema(this=table_parts) 2036 2037 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2038 extend_props(self._parse_properties()) 2039 2040 has_alias = self._match(TokenType.ALIAS) 2041 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2042 # exp.Properties.Location.POST_ALIAS 2043 extend_props(self._parse_properties()) 2044 2045 if create_token.token_type == TokenType.SEQUENCE: 2046 expression = self._parse_types() 2047 extend_props(self._parse_properties()) 2048 else: 2049 expression = self._parse_ddl_select() 2050 2051 # Some dialects also support using a table as an alias instead of a SELECT. 2052 # Here we fallback to this as an alternative. 2053 if not expression and has_alias: 2054 expression = self._try_parse(self._parse_table_parts) 2055 2056 if create_token.token_type == TokenType.TABLE: 2057 # exp.Properties.Location.POST_EXPRESSION 2058 extend_props(self._parse_properties()) 2059 2060 indexes = [] 2061 while True: 2062 index = self._parse_index() 2063 2064 # exp.Properties.Location.POST_INDEX 2065 extend_props(self._parse_properties()) 2066 if not index: 2067 break 2068 else: 2069 self._match(TokenType.COMMA) 2070 indexes.append(index) 2071 elif create_token.token_type == TokenType.VIEW: 2072 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2073 no_schema_binding = True 2074 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2075 extend_props(self._parse_properties()) 2076 2077 shallow = self._match_text_seq("SHALLOW") 2078 2079 if self._match_texts(self.CLONE_KEYWORDS): 2080 copy = self._prev.text.lower() == "copy" 2081 clone = self.expression( 2082 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2083 ) 2084 2085 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2086 return self._parse_as_command(start) 2087 2088 create_kind_text = create_token.text.upper() 2089 return self.expression( 2090 exp.Create, 2091 this=this, 2092 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2093 replace=replace, 2094 refresh=refresh, 2095 unique=unique, 2096 expression=expression, 2097 exists=exists, 2098 properties=properties, 2099 indexes=indexes, 2100 no_schema_binding=no_schema_binding, 2101 begin=begin, 2102 end=end, 2103 clone=clone, 2104 concurrently=concurrently, 2105 clustered=clustered, 2106 ) 2107 2108 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2109 seq = exp.SequenceProperties() 2110 2111 options = [] 2112 index = self._index 2113 2114 while self._curr: 2115 self._match(TokenType.COMMA) 2116 if self._match_text_seq("INCREMENT"): 2117 self._match_text_seq("BY") 2118 self._match_text_seq("=") 2119 seq.set("increment", self._parse_term()) 2120 elif self._match_text_seq("MINVALUE"): 2121 seq.set("minvalue", self._parse_term()) 2122 elif self._match_text_seq("MAXVALUE"): 2123 seq.set("maxvalue", self._parse_term()) 2124 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2125 self._match_text_seq("=") 2126 seq.set("start", self._parse_term()) 2127 elif self._match_text_seq("CACHE"): 2128 # T-SQL allows empty CACHE which is initialized dynamically 2129 seq.set("cache", self._parse_number() or True) 2130 elif self._match_text_seq("OWNED", "BY"): 2131 # "OWNED BY NONE" is the default 2132 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2133 else: 2134 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2135 if opt: 2136 options.append(opt) 2137 else: 2138 break 2139 2140 seq.set("options", options if options else None) 2141 return None if self._index == index else seq 2142 2143 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2144 # only used for teradata currently 2145 self._match(TokenType.COMMA) 2146 2147 kwargs = { 2148 "no": self._match_text_seq("NO"), 2149 "dual": self._match_text_seq("DUAL"), 2150 "before": self._match_text_seq("BEFORE"), 2151 "default": self._match_text_seq("DEFAULT"), 2152 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2153 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2154 "after": self._match_text_seq("AFTER"), 2155 "minimum": self._match_texts(("MIN", "MINIMUM")), 2156 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2157 } 2158 2159 if self._match_texts(self.PROPERTY_PARSERS): 2160 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2161 try: 2162 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2163 except TypeError: 2164 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2165 2166 return None 2167 2168 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2169 return self._parse_wrapped_csv(self._parse_property) 2170 2171 def _parse_property(self) -> t.Optional[exp.Expression]: 2172 if self._match_texts(self.PROPERTY_PARSERS): 2173 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2174 2175 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2176 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2177 2178 if self._match_text_seq("COMPOUND", "SORTKEY"): 2179 return self._parse_sortkey(compound=True) 2180 2181 if self._match_text_seq("SQL", "SECURITY"): 2182 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2183 2184 index = self._index 2185 key = self._parse_column() 2186 2187 if not self._match(TokenType.EQ): 2188 self._retreat(index) 2189 return self._parse_sequence_properties() 2190 2191 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2192 if isinstance(key, exp.Column): 2193 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2194 2195 value = self._parse_bitwise() or self._parse_var(any_token=True) 2196 2197 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2198 if isinstance(value, exp.Column): 2199 value = exp.var(value.name) 2200 2201 return self.expression(exp.Property, this=key, value=value) 2202 2203 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2204 if self._match_text_seq("BY"): 2205 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2206 2207 self._match(TokenType.ALIAS) 2208 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2209 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2210 2211 return self.expression( 2212 exp.FileFormatProperty, 2213 this=( 2214 self.expression( 2215 exp.InputOutputFormat, 2216 input_format=input_format, 2217 output_format=output_format, 2218 ) 2219 if input_format or output_format 2220 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2221 ), 2222 ) 2223 2224 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2225 field = self._parse_field() 2226 if isinstance(field, exp.Identifier) and not field.quoted: 2227 field = exp.var(field) 2228 2229 return field 2230 2231 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2232 self._match(TokenType.EQ) 2233 self._match(TokenType.ALIAS) 2234 2235 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2236 2237 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2238 properties = [] 2239 while True: 2240 if before: 2241 prop = self._parse_property_before() 2242 else: 2243 prop = self._parse_property() 2244 if not prop: 2245 break 2246 for p in ensure_list(prop): 2247 properties.append(p) 2248 2249 if properties: 2250 return self.expression(exp.Properties, expressions=properties) 2251 2252 return None 2253 2254 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2255 return self.expression( 2256 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2257 ) 2258 2259 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2260 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2261 security_specifier = self._prev.text.upper() 2262 return self.expression(exp.SecurityProperty, this=security_specifier) 2263 return None 2264 2265 def _parse_settings_property(self) -> exp.SettingsProperty: 2266 return self.expression( 2267 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2268 ) 2269 2270 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2271 if self._index >= 2: 2272 pre_volatile_token = self._tokens[self._index - 2] 2273 else: 2274 pre_volatile_token = None 2275 2276 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2277 return exp.VolatileProperty() 2278 2279 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2280 2281 def _parse_retention_period(self) -> exp.Var: 2282 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2283 number = self._parse_number() 2284 number_str = f"{number} " if number else "" 2285 unit = self._parse_var(any_token=True) 2286 return exp.var(f"{number_str}{unit}") 2287 2288 def _parse_system_versioning_property( 2289 self, with_: bool = False 2290 ) -> exp.WithSystemVersioningProperty: 2291 self._match(TokenType.EQ) 2292 prop = self.expression( 2293 exp.WithSystemVersioningProperty, 2294 **{ # type: ignore 2295 "on": True, 2296 "with": with_, 2297 }, 2298 ) 2299 2300 if self._match_text_seq("OFF"): 2301 prop.set("on", False) 2302 return prop 2303 2304 self._match(TokenType.ON) 2305 if self._match(TokenType.L_PAREN): 2306 while self._curr and not self._match(TokenType.R_PAREN): 2307 if self._match_text_seq("HISTORY_TABLE", "="): 2308 prop.set("this", self._parse_table_parts()) 2309 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2310 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2311 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2312 prop.set("retention_period", self._parse_retention_period()) 2313 2314 self._match(TokenType.COMMA) 2315 2316 return prop 2317 2318 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2319 self._match(TokenType.EQ) 2320 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2321 prop = self.expression(exp.DataDeletionProperty, on=on) 2322 2323 if self._match(TokenType.L_PAREN): 2324 while self._curr and not self._match(TokenType.R_PAREN): 2325 if self._match_text_seq("FILTER_COLUMN", "="): 2326 prop.set("filter_column", self._parse_column()) 2327 elif self._match_text_seq("RETENTION_PERIOD", "="): 2328 prop.set("retention_period", self._parse_retention_period()) 2329 2330 self._match(TokenType.COMMA) 2331 2332 return prop 2333 2334 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2335 kind = "HASH" 2336 expressions: t.Optional[t.List[exp.Expression]] = None 2337 if self._match_text_seq("BY", "HASH"): 2338 expressions = self._parse_wrapped_csv(self._parse_id_var) 2339 elif self._match_text_seq("BY", "RANDOM"): 2340 kind = "RANDOM" 2341 2342 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2343 buckets: t.Optional[exp.Expression] = None 2344 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2345 buckets = self._parse_number() 2346 2347 return self.expression( 2348 exp.DistributedByProperty, 2349 expressions=expressions, 2350 kind=kind, 2351 buckets=buckets, 2352 order=self._parse_order(), 2353 ) 2354 2355 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2356 self._match_text_seq("KEY") 2357 expressions = self._parse_wrapped_id_vars() 2358 return self.expression(expr_type, expressions=expressions) 2359 2360 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2361 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2362 prop = self._parse_system_versioning_property(with_=True) 2363 self._match_r_paren() 2364 return prop 2365 2366 if self._match(TokenType.L_PAREN, advance=False): 2367 return self._parse_wrapped_properties() 2368 2369 if self._match_text_seq("JOURNAL"): 2370 return self._parse_withjournaltable() 2371 2372 if self._match_texts(self.VIEW_ATTRIBUTES): 2373 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2374 2375 if self._match_text_seq("DATA"): 2376 return self._parse_withdata(no=False) 2377 elif self._match_text_seq("NO", "DATA"): 2378 return self._parse_withdata(no=True) 2379 2380 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2381 return self._parse_serde_properties(with_=True) 2382 2383 if self._match(TokenType.SCHEMA): 2384 return self.expression( 2385 exp.WithSchemaBindingProperty, 2386 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2387 ) 2388 2389 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2390 return self.expression( 2391 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2392 ) 2393 2394 if not self._next: 2395 return None 2396 2397 return self._parse_withisolatedloading() 2398 2399 def _parse_procedure_option(self) -> exp.Expression | None: 2400 if self._match_text_seq("EXECUTE", "AS"): 2401 return self.expression( 2402 exp.ExecuteAsProperty, 2403 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2404 or self._parse_string(), 2405 ) 2406 2407 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2408 2409 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2410 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2411 self._match(TokenType.EQ) 2412 2413 user = self._parse_id_var() 2414 self._match(TokenType.PARAMETER) 2415 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2416 2417 if not user or not host: 2418 return None 2419 2420 return exp.DefinerProperty(this=f"{user}@{host}") 2421 2422 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2423 self._match(TokenType.TABLE) 2424 self._match(TokenType.EQ) 2425 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2426 2427 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2428 return self.expression(exp.LogProperty, no=no) 2429 2430 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2431 return self.expression(exp.JournalProperty, **kwargs) 2432 2433 def _parse_checksum(self) -> exp.ChecksumProperty: 2434 self._match(TokenType.EQ) 2435 2436 on = None 2437 if self._match(TokenType.ON): 2438 on = True 2439 elif self._match_text_seq("OFF"): 2440 on = False 2441 2442 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2443 2444 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2445 return self.expression( 2446 exp.Cluster, 2447 expressions=( 2448 self._parse_wrapped_csv(self._parse_ordered) 2449 if wrapped 2450 else self._parse_csv(self._parse_ordered) 2451 ), 2452 ) 2453 2454 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2455 self._match_text_seq("BY") 2456 2457 self._match_l_paren() 2458 expressions = self._parse_csv(self._parse_column) 2459 self._match_r_paren() 2460 2461 if self._match_text_seq("SORTED", "BY"): 2462 self._match_l_paren() 2463 sorted_by = self._parse_csv(self._parse_ordered) 2464 self._match_r_paren() 2465 else: 2466 sorted_by = None 2467 2468 self._match(TokenType.INTO) 2469 buckets = self._parse_number() 2470 self._match_text_seq("BUCKETS") 2471 2472 return self.expression( 2473 exp.ClusteredByProperty, 2474 expressions=expressions, 2475 sorted_by=sorted_by, 2476 buckets=buckets, 2477 ) 2478 2479 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2480 if not self._match_text_seq("GRANTS"): 2481 self._retreat(self._index - 1) 2482 return None 2483 2484 return self.expression(exp.CopyGrantsProperty) 2485 2486 def _parse_freespace(self) -> exp.FreespaceProperty: 2487 self._match(TokenType.EQ) 2488 return self.expression( 2489 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2490 ) 2491 2492 def _parse_mergeblockratio( 2493 self, no: bool = False, default: bool = False 2494 ) -> exp.MergeBlockRatioProperty: 2495 if self._match(TokenType.EQ): 2496 return self.expression( 2497 exp.MergeBlockRatioProperty, 2498 this=self._parse_number(), 2499 percent=self._match(TokenType.PERCENT), 2500 ) 2501 2502 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2503 2504 def _parse_datablocksize( 2505 self, 2506 default: t.Optional[bool] = None, 2507 minimum: t.Optional[bool] = None, 2508 maximum: t.Optional[bool] = None, 2509 ) -> exp.DataBlocksizeProperty: 2510 self._match(TokenType.EQ) 2511 size = self._parse_number() 2512 2513 units = None 2514 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2515 units = self._prev.text 2516 2517 return self.expression( 2518 exp.DataBlocksizeProperty, 2519 size=size, 2520 units=units, 2521 default=default, 2522 minimum=minimum, 2523 maximum=maximum, 2524 ) 2525 2526 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2527 self._match(TokenType.EQ) 2528 always = self._match_text_seq("ALWAYS") 2529 manual = self._match_text_seq("MANUAL") 2530 never = self._match_text_seq("NEVER") 2531 default = self._match_text_seq("DEFAULT") 2532 2533 autotemp = None 2534 if self._match_text_seq("AUTOTEMP"): 2535 autotemp = self._parse_schema() 2536 2537 return self.expression( 2538 exp.BlockCompressionProperty, 2539 always=always, 2540 manual=manual, 2541 never=never, 2542 default=default, 2543 autotemp=autotemp, 2544 ) 2545 2546 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2547 index = self._index 2548 no = self._match_text_seq("NO") 2549 concurrent = self._match_text_seq("CONCURRENT") 2550 2551 if not self._match_text_seq("ISOLATED", "LOADING"): 2552 self._retreat(index) 2553 return None 2554 2555 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2556 return self.expression( 2557 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2558 ) 2559 2560 def _parse_locking(self) -> exp.LockingProperty: 2561 if self._match(TokenType.TABLE): 2562 kind = "TABLE" 2563 elif self._match(TokenType.VIEW): 2564 kind = "VIEW" 2565 elif self._match(TokenType.ROW): 2566 kind = "ROW" 2567 elif self._match_text_seq("DATABASE"): 2568 kind = "DATABASE" 2569 else: 2570 kind = None 2571 2572 if kind in ("DATABASE", "TABLE", "VIEW"): 2573 this = self._parse_table_parts() 2574 else: 2575 this = None 2576 2577 if self._match(TokenType.FOR): 2578 for_or_in = "FOR" 2579 elif self._match(TokenType.IN): 2580 for_or_in = "IN" 2581 else: 2582 for_or_in = None 2583 2584 if self._match_text_seq("ACCESS"): 2585 lock_type = "ACCESS" 2586 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2587 lock_type = "EXCLUSIVE" 2588 elif self._match_text_seq("SHARE"): 2589 lock_type = "SHARE" 2590 elif self._match_text_seq("READ"): 2591 lock_type = "READ" 2592 elif self._match_text_seq("WRITE"): 2593 lock_type = "WRITE" 2594 elif self._match_text_seq("CHECKSUM"): 2595 lock_type = "CHECKSUM" 2596 else: 2597 lock_type = None 2598 2599 override = self._match_text_seq("OVERRIDE") 2600 2601 return self.expression( 2602 exp.LockingProperty, 2603 this=this, 2604 kind=kind, 2605 for_or_in=for_or_in, 2606 lock_type=lock_type, 2607 override=override, 2608 ) 2609 2610 def _parse_partition_by(self) -> t.List[exp.Expression]: 2611 if self._match(TokenType.PARTITION_BY): 2612 return self._parse_csv(self._parse_assignment) 2613 return [] 2614 2615 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2616 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2617 if self._match_text_seq("MINVALUE"): 2618 return exp.var("MINVALUE") 2619 if self._match_text_seq("MAXVALUE"): 2620 return exp.var("MAXVALUE") 2621 return self._parse_bitwise() 2622 2623 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2624 expression = None 2625 from_expressions = None 2626 to_expressions = None 2627 2628 if self._match(TokenType.IN): 2629 this = self._parse_wrapped_csv(self._parse_bitwise) 2630 elif self._match(TokenType.FROM): 2631 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2632 self._match_text_seq("TO") 2633 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2634 elif self._match_text_seq("WITH", "(", "MODULUS"): 2635 this = self._parse_number() 2636 self._match_text_seq(",", "REMAINDER") 2637 expression = self._parse_number() 2638 self._match_r_paren() 2639 else: 2640 self.raise_error("Failed to parse partition bound spec.") 2641 2642 return self.expression( 2643 exp.PartitionBoundSpec, 2644 this=this, 2645 expression=expression, 2646 from_expressions=from_expressions, 2647 to_expressions=to_expressions, 2648 ) 2649 2650 # https://www.postgresql.org/docs/current/sql-createtable.html 2651 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2652 if not self._match_text_seq("OF"): 2653 self._retreat(self._index - 1) 2654 return None 2655 2656 this = self._parse_table(schema=True) 2657 2658 if self._match(TokenType.DEFAULT): 2659 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2660 elif self._match_text_seq("FOR", "VALUES"): 2661 expression = self._parse_partition_bound_spec() 2662 else: 2663 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2664 2665 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2666 2667 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2668 self._match(TokenType.EQ) 2669 return self.expression( 2670 exp.PartitionedByProperty, 2671 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2672 ) 2673 2674 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2675 if self._match_text_seq("AND", "STATISTICS"): 2676 statistics = True 2677 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2678 statistics = False 2679 else: 2680 statistics = None 2681 2682 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2683 2684 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2685 if self._match_text_seq("SQL"): 2686 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2687 return None 2688 2689 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2690 if self._match_text_seq("SQL", "DATA"): 2691 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2692 return None 2693 2694 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2695 if self._match_text_seq("PRIMARY", "INDEX"): 2696 return exp.NoPrimaryIndexProperty() 2697 if self._match_text_seq("SQL"): 2698 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2699 return None 2700 2701 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2702 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2703 return exp.OnCommitProperty() 2704 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2705 return exp.OnCommitProperty(delete=True) 2706 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2707 2708 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2709 if self._match_text_seq("SQL", "DATA"): 2710 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2711 return None 2712 2713 def _parse_distkey(self) -> exp.DistKeyProperty: 2714 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2715 2716 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2717 table = self._parse_table(schema=True) 2718 2719 options = [] 2720 while self._match_texts(("INCLUDING", "EXCLUDING")): 2721 this = self._prev.text.upper() 2722 2723 id_var = self._parse_id_var() 2724 if not id_var: 2725 return None 2726 2727 options.append( 2728 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2729 ) 2730 2731 return self.expression(exp.LikeProperty, this=table, expressions=options) 2732 2733 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2734 return self.expression( 2735 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2736 ) 2737 2738 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2739 self._match(TokenType.EQ) 2740 return self.expression( 2741 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2742 ) 2743 2744 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2745 self._match_text_seq("WITH", "CONNECTION") 2746 return self.expression( 2747 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2748 ) 2749 2750 def _parse_returns(self) -> exp.ReturnsProperty: 2751 value: t.Optional[exp.Expression] 2752 null = None 2753 is_table = self._match(TokenType.TABLE) 2754 2755 if is_table: 2756 if self._match(TokenType.LT): 2757 value = self.expression( 2758 exp.Schema, 2759 this="TABLE", 2760 expressions=self._parse_csv(self._parse_struct_types), 2761 ) 2762 if not self._match(TokenType.GT): 2763 self.raise_error("Expecting >") 2764 else: 2765 value = self._parse_schema(exp.var("TABLE")) 2766 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2767 null = True 2768 value = None 2769 else: 2770 value = self._parse_types() 2771 2772 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2773 2774 def _parse_describe(self) -> exp.Describe: 2775 kind = self._match_set(self.CREATABLES) and self._prev.text 2776 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2777 if self._match(TokenType.DOT): 2778 style = None 2779 self._retreat(self._index - 2) 2780 2781 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2782 2783 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2784 this = self._parse_statement() 2785 else: 2786 this = self._parse_table(schema=True) 2787 2788 properties = self._parse_properties() 2789 expressions = properties.expressions if properties else None 2790 partition = self._parse_partition() 2791 return self.expression( 2792 exp.Describe, 2793 this=this, 2794 style=style, 2795 kind=kind, 2796 expressions=expressions, 2797 partition=partition, 2798 format=format, 2799 ) 2800 2801 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2802 kind = self._prev.text.upper() 2803 expressions = [] 2804 2805 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2806 if self._match(TokenType.WHEN): 2807 expression = self._parse_disjunction() 2808 self._match(TokenType.THEN) 2809 else: 2810 expression = None 2811 2812 else_ = self._match(TokenType.ELSE) 2813 2814 if not self._match(TokenType.INTO): 2815 return None 2816 2817 return self.expression( 2818 exp.ConditionalInsert, 2819 this=self.expression( 2820 exp.Insert, 2821 this=self._parse_table(schema=True), 2822 expression=self._parse_derived_table_values(), 2823 ), 2824 expression=expression, 2825 else_=else_, 2826 ) 2827 2828 expression = parse_conditional_insert() 2829 while expression is not None: 2830 expressions.append(expression) 2831 expression = parse_conditional_insert() 2832 2833 return self.expression( 2834 exp.MultitableInserts, 2835 kind=kind, 2836 comments=comments, 2837 expressions=expressions, 2838 source=self._parse_table(), 2839 ) 2840 2841 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2842 comments = [] 2843 hint = self._parse_hint() 2844 overwrite = self._match(TokenType.OVERWRITE) 2845 ignore = self._match(TokenType.IGNORE) 2846 local = self._match_text_seq("LOCAL") 2847 alternative = None 2848 is_function = None 2849 2850 if self._match_text_seq("DIRECTORY"): 2851 this: t.Optional[exp.Expression] = self.expression( 2852 exp.Directory, 2853 this=self._parse_var_or_string(), 2854 local=local, 2855 row_format=self._parse_row_format(match_row=True), 2856 ) 2857 else: 2858 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2859 comments += ensure_list(self._prev_comments) 2860 return self._parse_multitable_inserts(comments) 2861 2862 if self._match(TokenType.OR): 2863 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2864 2865 self._match(TokenType.INTO) 2866 comments += ensure_list(self._prev_comments) 2867 self._match(TokenType.TABLE) 2868 is_function = self._match(TokenType.FUNCTION) 2869 2870 this = ( 2871 self._parse_table(schema=True, parse_partition=True) 2872 if not is_function 2873 else self._parse_function() 2874 ) 2875 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2876 this.set("alias", self._parse_table_alias()) 2877 2878 returning = self._parse_returning() 2879 2880 return self.expression( 2881 exp.Insert, 2882 comments=comments, 2883 hint=hint, 2884 is_function=is_function, 2885 this=this, 2886 stored=self._match_text_seq("STORED") and self._parse_stored(), 2887 by_name=self._match_text_seq("BY", "NAME"), 2888 exists=self._parse_exists(), 2889 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2890 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2891 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2892 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2893 conflict=self._parse_on_conflict(), 2894 returning=returning or self._parse_returning(), 2895 overwrite=overwrite, 2896 alternative=alternative, 2897 ignore=ignore, 2898 source=self._match(TokenType.TABLE) and self._parse_table(), 2899 ) 2900 2901 def _parse_kill(self) -> exp.Kill: 2902 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2903 2904 return self.expression( 2905 exp.Kill, 2906 this=self._parse_primary(), 2907 kind=kind, 2908 ) 2909 2910 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2911 conflict = self._match_text_seq("ON", "CONFLICT") 2912 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2913 2914 if not conflict and not duplicate: 2915 return None 2916 2917 conflict_keys = None 2918 constraint = None 2919 2920 if conflict: 2921 if self._match_text_seq("ON", "CONSTRAINT"): 2922 constraint = self._parse_id_var() 2923 elif self._match(TokenType.L_PAREN): 2924 conflict_keys = self._parse_csv(self._parse_id_var) 2925 self._match_r_paren() 2926 2927 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2928 if self._prev.token_type == TokenType.UPDATE: 2929 self._match(TokenType.SET) 2930 expressions = self._parse_csv(self._parse_equality) 2931 else: 2932 expressions = None 2933 2934 return self.expression( 2935 exp.OnConflict, 2936 duplicate=duplicate, 2937 expressions=expressions, 2938 action=action, 2939 conflict_keys=conflict_keys, 2940 constraint=constraint, 2941 where=self._parse_where(), 2942 ) 2943 2944 def _parse_returning(self) -> t.Optional[exp.Returning]: 2945 if not self._match(TokenType.RETURNING): 2946 return None 2947 return self.expression( 2948 exp.Returning, 2949 expressions=self._parse_csv(self._parse_expression), 2950 into=self._match(TokenType.INTO) and self._parse_table_part(), 2951 ) 2952 2953 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2954 if not self._match(TokenType.FORMAT): 2955 return None 2956 return self._parse_row_format() 2957 2958 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2959 index = self._index 2960 with_ = with_ or self._match_text_seq("WITH") 2961 2962 if not self._match(TokenType.SERDE_PROPERTIES): 2963 self._retreat(index) 2964 return None 2965 return self.expression( 2966 exp.SerdeProperties, 2967 **{ # type: ignore 2968 "expressions": self._parse_wrapped_properties(), 2969 "with": with_, 2970 }, 2971 ) 2972 2973 def _parse_row_format( 2974 self, match_row: bool = False 2975 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2976 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2977 return None 2978 2979 if self._match_text_seq("SERDE"): 2980 this = self._parse_string() 2981 2982 serde_properties = self._parse_serde_properties() 2983 2984 return self.expression( 2985 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2986 ) 2987 2988 self._match_text_seq("DELIMITED") 2989 2990 kwargs = {} 2991 2992 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2993 kwargs["fields"] = self._parse_string() 2994 if self._match_text_seq("ESCAPED", "BY"): 2995 kwargs["escaped"] = self._parse_string() 2996 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2997 kwargs["collection_items"] = self._parse_string() 2998 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2999 kwargs["map_keys"] = self._parse_string() 3000 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3001 kwargs["lines"] = self._parse_string() 3002 if self._match_text_seq("NULL", "DEFINED", "AS"): 3003 kwargs["null"] = self._parse_string() 3004 3005 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3006 3007 def _parse_load(self) -> exp.LoadData | exp.Command: 3008 if self._match_text_seq("DATA"): 3009 local = self._match_text_seq("LOCAL") 3010 self._match_text_seq("INPATH") 3011 inpath = self._parse_string() 3012 overwrite = self._match(TokenType.OVERWRITE) 3013 self._match_pair(TokenType.INTO, TokenType.TABLE) 3014 3015 return self.expression( 3016 exp.LoadData, 3017 this=self._parse_table(schema=True), 3018 local=local, 3019 overwrite=overwrite, 3020 inpath=inpath, 3021 partition=self._parse_partition(), 3022 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3023 serde=self._match_text_seq("SERDE") and self._parse_string(), 3024 ) 3025 return self._parse_as_command(self._prev) 3026 3027 def _parse_delete(self) -> exp.Delete: 3028 # This handles MySQL's "Multiple-Table Syntax" 3029 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3030 tables = None 3031 if not self._match(TokenType.FROM, advance=False): 3032 tables = self._parse_csv(self._parse_table) or None 3033 3034 returning = self._parse_returning() 3035 3036 return self.expression( 3037 exp.Delete, 3038 tables=tables, 3039 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3040 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3041 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3042 where=self._parse_where(), 3043 returning=returning or self._parse_returning(), 3044 limit=self._parse_limit(), 3045 ) 3046 3047 def _parse_update(self) -> exp.Update: 3048 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3049 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3050 returning = self._parse_returning() 3051 return self.expression( 3052 exp.Update, 3053 **{ # type: ignore 3054 "this": this, 3055 "expressions": expressions, 3056 "from": self._parse_from(joins=True), 3057 "where": self._parse_where(), 3058 "returning": returning or self._parse_returning(), 3059 "order": self._parse_order(), 3060 "limit": self._parse_limit(), 3061 }, 3062 ) 3063 3064 def _parse_use(self) -> exp.Use: 3065 return self.expression( 3066 exp.Use, 3067 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3068 this=self._parse_table(schema=False), 3069 ) 3070 3071 def _parse_uncache(self) -> exp.Uncache: 3072 if not self._match(TokenType.TABLE): 3073 self.raise_error("Expecting TABLE after UNCACHE") 3074 3075 return self.expression( 3076 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3077 ) 3078 3079 def _parse_cache(self) -> exp.Cache: 3080 lazy = self._match_text_seq("LAZY") 3081 self._match(TokenType.TABLE) 3082 table = self._parse_table(schema=True) 3083 3084 options = [] 3085 if self._match_text_seq("OPTIONS"): 3086 self._match_l_paren() 3087 k = self._parse_string() 3088 self._match(TokenType.EQ) 3089 v = self._parse_string() 3090 options = [k, v] 3091 self._match_r_paren() 3092 3093 self._match(TokenType.ALIAS) 3094 return self.expression( 3095 exp.Cache, 3096 this=table, 3097 lazy=lazy, 3098 options=options, 3099 expression=self._parse_select(nested=True), 3100 ) 3101 3102 def _parse_partition(self) -> t.Optional[exp.Partition]: 3103 if not self._match_texts(self.PARTITION_KEYWORDS): 3104 return None 3105 3106 return self.expression( 3107 exp.Partition, 3108 subpartition=self._prev.text.upper() == "SUBPARTITION", 3109 expressions=self._parse_wrapped_csv(self._parse_assignment), 3110 ) 3111 3112 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3113 def _parse_value_expression() -> t.Optional[exp.Expression]: 3114 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3115 return exp.var(self._prev.text.upper()) 3116 return self._parse_expression() 3117 3118 if self._match(TokenType.L_PAREN): 3119 expressions = self._parse_csv(_parse_value_expression) 3120 self._match_r_paren() 3121 return self.expression(exp.Tuple, expressions=expressions) 3122 3123 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3124 expression = self._parse_expression() 3125 if expression: 3126 return self.expression(exp.Tuple, expressions=[expression]) 3127 return None 3128 3129 def _parse_projections(self) -> t.List[exp.Expression]: 3130 return self._parse_expressions() 3131 3132 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3133 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3134 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3135 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3136 ) 3137 elif self._match(TokenType.FROM): 3138 from_ = self._parse_from(skip_from_token=True) 3139 # Support parentheses for duckdb FROM-first syntax 3140 select = self._parse_select() 3141 if select: 3142 select.set("from", from_) 3143 this = select 3144 else: 3145 this = exp.select("*").from_(t.cast(exp.From, from_)) 3146 else: 3147 this = ( 3148 self._parse_table() 3149 if table 3150 else self._parse_select(nested=True, parse_set_operation=False) 3151 ) 3152 3153 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3154 # in case a modifier (e.g. join) is following 3155 if table and isinstance(this, exp.Values) and this.alias: 3156 alias = this.args["alias"].pop() 3157 this = exp.Table(this=this, alias=alias) 3158 3159 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3160 3161 return this 3162 3163 def _parse_select( 3164 self, 3165 nested: bool = False, 3166 table: bool = False, 3167 parse_subquery_alias: bool = True, 3168 parse_set_operation: bool = True, 3169 ) -> t.Optional[exp.Expression]: 3170 cte = self._parse_with() 3171 3172 if cte: 3173 this = self._parse_statement() 3174 3175 if not this: 3176 self.raise_error("Failed to parse any statement following CTE") 3177 return cte 3178 3179 if "with" in this.arg_types: 3180 this.set("with", cte) 3181 else: 3182 self.raise_error(f"{this.key} does not support CTE") 3183 this = cte 3184 3185 return this 3186 3187 # duckdb supports leading with FROM x 3188 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3189 3190 if self._match(TokenType.SELECT): 3191 comments = self._prev_comments 3192 3193 hint = self._parse_hint() 3194 3195 if self._next and not self._next.token_type == TokenType.DOT: 3196 all_ = self._match(TokenType.ALL) 3197 distinct = self._match_set(self.DISTINCT_TOKENS) 3198 else: 3199 all_, distinct = None, None 3200 3201 kind = ( 3202 self._match(TokenType.ALIAS) 3203 and self._match_texts(("STRUCT", "VALUE")) 3204 and self._prev.text.upper() 3205 ) 3206 3207 if distinct: 3208 distinct = self.expression( 3209 exp.Distinct, 3210 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3211 ) 3212 3213 if all_ and distinct: 3214 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3215 3216 operation_modifiers = [] 3217 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3218 operation_modifiers.append(exp.var(self._prev.text.upper())) 3219 3220 limit = self._parse_limit(top=True) 3221 projections = self._parse_projections() 3222 3223 this = self.expression( 3224 exp.Select, 3225 kind=kind, 3226 hint=hint, 3227 distinct=distinct, 3228 expressions=projections, 3229 limit=limit, 3230 operation_modifiers=operation_modifiers or None, 3231 ) 3232 this.comments = comments 3233 3234 into = self._parse_into() 3235 if into: 3236 this.set("into", into) 3237 3238 if not from_: 3239 from_ = self._parse_from() 3240 3241 if from_: 3242 this.set("from", from_) 3243 3244 this = self._parse_query_modifiers(this) 3245 elif (table or nested) and self._match(TokenType.L_PAREN): 3246 this = self._parse_wrapped_select(table=table) 3247 3248 # We return early here so that the UNION isn't attached to the subquery by the 3249 # following call to _parse_set_operations, but instead becomes the parent node 3250 self._match_r_paren() 3251 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3252 elif self._match(TokenType.VALUES, advance=False): 3253 this = self._parse_derived_table_values() 3254 elif from_: 3255 if self._match(TokenType.PIPE_GT, advance=False): 3256 return self._parse_pipe_syntax_query( 3257 exp.Select().from_(from_.this, append=False, copy=False) 3258 ) 3259 this = exp.select("*").from_(from_.this, copy=False) 3260 elif self._match(TokenType.SUMMARIZE): 3261 table = self._match(TokenType.TABLE) 3262 this = self._parse_select() or self._parse_string() or self._parse_table() 3263 return self.expression(exp.Summarize, this=this, table=table) 3264 elif self._match(TokenType.DESCRIBE): 3265 this = self._parse_describe() 3266 elif self._match_text_seq("STREAM"): 3267 this = self._parse_function() 3268 if this: 3269 this = self.expression(exp.Stream, this=this) 3270 else: 3271 self._retreat(self._index - 1) 3272 else: 3273 this = None 3274 3275 return self._parse_set_operations(this) if parse_set_operation else this 3276 3277 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3278 self._match_text_seq("SEARCH") 3279 3280 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3281 3282 if not kind: 3283 return None 3284 3285 self._match_text_seq("FIRST", "BY") 3286 3287 return self.expression( 3288 exp.RecursiveWithSearch, 3289 kind=kind, 3290 this=self._parse_id_var(), 3291 expression=self._match_text_seq("SET") and self._parse_id_var(), 3292 using=self._match_text_seq("USING") and self._parse_id_var(), 3293 ) 3294 3295 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3296 if not skip_with_token and not self._match(TokenType.WITH): 3297 return None 3298 3299 comments = self._prev_comments 3300 recursive = self._match(TokenType.RECURSIVE) 3301 3302 last_comments = None 3303 expressions = [] 3304 while True: 3305 cte = self._parse_cte() 3306 if isinstance(cte, exp.CTE): 3307 expressions.append(cte) 3308 if last_comments: 3309 cte.add_comments(last_comments) 3310 3311 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3312 break 3313 else: 3314 self._match(TokenType.WITH) 3315 3316 last_comments = self._prev_comments 3317 3318 return self.expression( 3319 exp.With, 3320 comments=comments, 3321 expressions=expressions, 3322 recursive=recursive, 3323 search=self._parse_recursive_with_search(), 3324 ) 3325 3326 def _parse_cte(self) -> t.Optional[exp.CTE]: 3327 index = self._index 3328 3329 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3330 if not alias or not alias.this: 3331 self.raise_error("Expected CTE to have alias") 3332 3333 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3334 self._retreat(index) 3335 return None 3336 3337 comments = self._prev_comments 3338 3339 if self._match_text_seq("NOT", "MATERIALIZED"): 3340 materialized = False 3341 elif self._match_text_seq("MATERIALIZED"): 3342 materialized = True 3343 else: 3344 materialized = None 3345 3346 cte = self.expression( 3347 exp.CTE, 3348 this=self._parse_wrapped(self._parse_statement), 3349 alias=alias, 3350 materialized=materialized, 3351 comments=comments, 3352 ) 3353 3354 if isinstance(cte.this, exp.Values): 3355 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3356 3357 return cte 3358 3359 def _parse_table_alias( 3360 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3361 ) -> t.Optional[exp.TableAlias]: 3362 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3363 # so this section tries to parse the clause version and if it fails, it treats the token 3364 # as an identifier (alias) 3365 if self._can_parse_limit_or_offset(): 3366 return None 3367 3368 any_token = self._match(TokenType.ALIAS) 3369 alias = ( 3370 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3371 or self._parse_string_as_identifier() 3372 ) 3373 3374 index = self._index 3375 if self._match(TokenType.L_PAREN): 3376 columns = self._parse_csv(self._parse_function_parameter) 3377 self._match_r_paren() if columns else self._retreat(index) 3378 else: 3379 columns = None 3380 3381 if not alias and not columns: 3382 return None 3383 3384 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3385 3386 # We bubble up comments from the Identifier to the TableAlias 3387 if isinstance(alias, exp.Identifier): 3388 table_alias.add_comments(alias.pop_comments()) 3389 3390 return table_alias 3391 3392 def _parse_subquery( 3393 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3394 ) -> t.Optional[exp.Subquery]: 3395 if not this: 3396 return None 3397 3398 return self.expression( 3399 exp.Subquery, 3400 this=this, 3401 pivots=self._parse_pivots(), 3402 alias=self._parse_table_alias() if parse_alias else None, 3403 sample=self._parse_table_sample(), 3404 ) 3405 3406 def _implicit_unnests_to_explicit(self, this: E) -> E: 3407 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3408 3409 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3410 for i, join in enumerate(this.args.get("joins") or []): 3411 table = join.this 3412 normalized_table = table.copy() 3413 normalized_table.meta["maybe_column"] = True 3414 normalized_table = _norm(normalized_table, dialect=self.dialect) 3415 3416 if isinstance(table, exp.Table) and not join.args.get("on"): 3417 if normalized_table.parts[0].name in refs: 3418 table_as_column = table.to_column() 3419 unnest = exp.Unnest(expressions=[table_as_column]) 3420 3421 # Table.to_column creates a parent Alias node that we want to convert to 3422 # a TableAlias and attach to the Unnest, so it matches the parser's output 3423 if isinstance(table.args.get("alias"), exp.TableAlias): 3424 table_as_column.replace(table_as_column.this) 3425 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3426 3427 table.replace(unnest) 3428 3429 refs.add(normalized_table.alias_or_name) 3430 3431 return this 3432 3433 def _parse_query_modifiers( 3434 self, this: t.Optional[exp.Expression] 3435 ) -> t.Optional[exp.Expression]: 3436 if isinstance(this, self.MODIFIABLES): 3437 for join in self._parse_joins(): 3438 this.append("joins", join) 3439 for lateral in iter(self._parse_lateral, None): 3440 this.append("laterals", lateral) 3441 3442 while True: 3443 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3444 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3445 key, expression = parser(self) 3446 3447 if expression: 3448 this.set(key, expression) 3449 if key == "limit": 3450 offset = expression.args.pop("offset", None) 3451 3452 if offset: 3453 offset = exp.Offset(expression=offset) 3454 this.set("offset", offset) 3455 3456 limit_by_expressions = expression.expressions 3457 expression.set("expressions", None) 3458 offset.set("expressions", limit_by_expressions) 3459 continue 3460 break 3461 3462 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3463 this = self._implicit_unnests_to_explicit(this) 3464 3465 return this 3466 3467 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3468 start = self._curr 3469 while self._curr: 3470 self._advance() 3471 3472 end = self._tokens[self._index - 1] 3473 return exp.Hint(expressions=[self._find_sql(start, end)]) 3474 3475 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3476 return self._parse_function_call() 3477 3478 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3479 start_index = self._index 3480 should_fallback_to_string = False 3481 3482 hints = [] 3483 try: 3484 for hint in iter( 3485 lambda: self._parse_csv( 3486 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3487 ), 3488 [], 3489 ): 3490 hints.extend(hint) 3491 except ParseError: 3492 should_fallback_to_string = True 3493 3494 if should_fallback_to_string or self._curr: 3495 self._retreat(start_index) 3496 return self._parse_hint_fallback_to_string() 3497 3498 return self.expression(exp.Hint, expressions=hints) 3499 3500 def _parse_hint(self) -> t.Optional[exp.Hint]: 3501 if self._match(TokenType.HINT) and self._prev_comments: 3502 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3503 3504 return None 3505 3506 def _parse_into(self) -> t.Optional[exp.Into]: 3507 if not self._match(TokenType.INTO): 3508 return None 3509 3510 temp = self._match(TokenType.TEMPORARY) 3511 unlogged = self._match_text_seq("UNLOGGED") 3512 self._match(TokenType.TABLE) 3513 3514 return self.expression( 3515 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3516 ) 3517 3518 def _parse_from( 3519 self, joins: bool = False, skip_from_token: bool = False 3520 ) -> t.Optional[exp.From]: 3521 if not skip_from_token and not self._match(TokenType.FROM): 3522 return None 3523 3524 return self.expression( 3525 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3526 ) 3527 3528 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3529 return self.expression( 3530 exp.MatchRecognizeMeasure, 3531 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3532 this=self._parse_expression(), 3533 ) 3534 3535 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3536 if not self._match(TokenType.MATCH_RECOGNIZE): 3537 return None 3538 3539 self._match_l_paren() 3540 3541 partition = self._parse_partition_by() 3542 order = self._parse_order() 3543 3544 measures = ( 3545 self._parse_csv(self._parse_match_recognize_measure) 3546 if self._match_text_seq("MEASURES") 3547 else None 3548 ) 3549 3550 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3551 rows = exp.var("ONE ROW PER MATCH") 3552 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3553 text = "ALL ROWS PER MATCH" 3554 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3555 text += " SHOW EMPTY MATCHES" 3556 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3557 text += " OMIT EMPTY MATCHES" 3558 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3559 text += " WITH UNMATCHED ROWS" 3560 rows = exp.var(text) 3561 else: 3562 rows = None 3563 3564 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3565 text = "AFTER MATCH SKIP" 3566 if self._match_text_seq("PAST", "LAST", "ROW"): 3567 text += " PAST LAST ROW" 3568 elif self._match_text_seq("TO", "NEXT", "ROW"): 3569 text += " TO NEXT ROW" 3570 elif self._match_text_seq("TO", "FIRST"): 3571 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3572 elif self._match_text_seq("TO", "LAST"): 3573 text += f" TO LAST {self._advance_any().text}" # type: ignore 3574 after = exp.var(text) 3575 else: 3576 after = None 3577 3578 if self._match_text_seq("PATTERN"): 3579 self._match_l_paren() 3580 3581 if not self._curr: 3582 self.raise_error("Expecting )", self._curr) 3583 3584 paren = 1 3585 start = self._curr 3586 3587 while self._curr and paren > 0: 3588 if self._curr.token_type == TokenType.L_PAREN: 3589 paren += 1 3590 if self._curr.token_type == TokenType.R_PAREN: 3591 paren -= 1 3592 3593 end = self._prev 3594 self._advance() 3595 3596 if paren > 0: 3597 self.raise_error("Expecting )", self._curr) 3598 3599 pattern = exp.var(self._find_sql(start, end)) 3600 else: 3601 pattern = None 3602 3603 define = ( 3604 self._parse_csv(self._parse_name_as_expression) 3605 if self._match_text_seq("DEFINE") 3606 else None 3607 ) 3608 3609 self._match_r_paren() 3610 3611 return self.expression( 3612 exp.MatchRecognize, 3613 partition_by=partition, 3614 order=order, 3615 measures=measures, 3616 rows=rows, 3617 after=after, 3618 pattern=pattern, 3619 define=define, 3620 alias=self._parse_table_alias(), 3621 ) 3622 3623 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3624 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3625 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3626 cross_apply = False 3627 3628 if cross_apply is not None: 3629 this = self._parse_select(table=True) 3630 view = None 3631 outer = None 3632 elif self._match(TokenType.LATERAL): 3633 this = self._parse_select(table=True) 3634 view = self._match(TokenType.VIEW) 3635 outer = self._match(TokenType.OUTER) 3636 else: 3637 return None 3638 3639 if not this: 3640 this = ( 3641 self._parse_unnest() 3642 or self._parse_function() 3643 or self._parse_id_var(any_token=False) 3644 ) 3645 3646 while self._match(TokenType.DOT): 3647 this = exp.Dot( 3648 this=this, 3649 expression=self._parse_function() or self._parse_id_var(any_token=False), 3650 ) 3651 3652 ordinality: t.Optional[bool] = None 3653 3654 if view: 3655 table = self._parse_id_var(any_token=False) 3656 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3657 table_alias: t.Optional[exp.TableAlias] = self.expression( 3658 exp.TableAlias, this=table, columns=columns 3659 ) 3660 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3661 # We move the alias from the lateral's child node to the lateral itself 3662 table_alias = this.args["alias"].pop() 3663 else: 3664 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3665 table_alias = self._parse_table_alias() 3666 3667 return self.expression( 3668 exp.Lateral, 3669 this=this, 3670 view=view, 3671 outer=outer, 3672 alias=table_alias, 3673 cross_apply=cross_apply, 3674 ordinality=ordinality, 3675 ) 3676 3677 def _parse_join_parts( 3678 self, 3679 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3680 return ( 3681 self._match_set(self.JOIN_METHODS) and self._prev, 3682 self._match_set(self.JOIN_SIDES) and self._prev, 3683 self._match_set(self.JOIN_KINDS) and self._prev, 3684 ) 3685 3686 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3687 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3688 this = self._parse_column() 3689 if isinstance(this, exp.Column): 3690 return this.this 3691 return this 3692 3693 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3694 3695 def _parse_join( 3696 self, skip_join_token: bool = False, parse_bracket: bool = False 3697 ) -> t.Optional[exp.Join]: 3698 if self._match(TokenType.COMMA): 3699 table = self._try_parse(self._parse_table) 3700 if table: 3701 return self.expression(exp.Join, this=table) 3702 return None 3703 3704 index = self._index 3705 method, side, kind = self._parse_join_parts() 3706 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3707 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3708 3709 if not skip_join_token and not join: 3710 self._retreat(index) 3711 kind = None 3712 method = None 3713 side = None 3714 3715 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3716 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3717 3718 if not skip_join_token and not join and not outer_apply and not cross_apply: 3719 return None 3720 3721 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3722 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3723 kwargs["expressions"] = self._parse_csv( 3724 lambda: self._parse_table(parse_bracket=parse_bracket) 3725 ) 3726 3727 if method: 3728 kwargs["method"] = method.text 3729 if side: 3730 kwargs["side"] = side.text 3731 if kind: 3732 kwargs["kind"] = kind.text 3733 if hint: 3734 kwargs["hint"] = hint 3735 3736 if self._match(TokenType.MATCH_CONDITION): 3737 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3738 3739 if self._match(TokenType.ON): 3740 kwargs["on"] = self._parse_assignment() 3741 elif self._match(TokenType.USING): 3742 kwargs["using"] = self._parse_using_identifiers() 3743 elif ( 3744 not (outer_apply or cross_apply) 3745 and not isinstance(kwargs["this"], exp.Unnest) 3746 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3747 ): 3748 index = self._index 3749 joins: t.Optional[list] = list(self._parse_joins()) 3750 3751 if joins and self._match(TokenType.ON): 3752 kwargs["on"] = self._parse_assignment() 3753 elif joins and self._match(TokenType.USING): 3754 kwargs["using"] = self._parse_using_identifiers() 3755 else: 3756 joins = None 3757 self._retreat(index) 3758 3759 kwargs["this"].set("joins", joins if joins else None) 3760 3761 kwargs["pivots"] = self._parse_pivots() 3762 3763 comments = [c for token in (method, side, kind) if token for c in token.comments] 3764 return self.expression(exp.Join, comments=comments, **kwargs) 3765 3766 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3767 this = self._parse_assignment() 3768 3769 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3770 return this 3771 3772 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3773 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3774 3775 return this 3776 3777 def _parse_index_params(self) -> exp.IndexParameters: 3778 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3779 3780 if self._match(TokenType.L_PAREN, advance=False): 3781 columns = self._parse_wrapped_csv(self._parse_with_operator) 3782 else: 3783 columns = None 3784 3785 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3786 partition_by = self._parse_partition_by() 3787 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3788 tablespace = ( 3789 self._parse_var(any_token=True) 3790 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3791 else None 3792 ) 3793 where = self._parse_where() 3794 3795 on = self._parse_field() if self._match(TokenType.ON) else None 3796 3797 return self.expression( 3798 exp.IndexParameters, 3799 using=using, 3800 columns=columns, 3801 include=include, 3802 partition_by=partition_by, 3803 where=where, 3804 with_storage=with_storage, 3805 tablespace=tablespace, 3806 on=on, 3807 ) 3808 3809 def _parse_index( 3810 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3811 ) -> t.Optional[exp.Index]: 3812 if index or anonymous: 3813 unique = None 3814 primary = None 3815 amp = None 3816 3817 self._match(TokenType.ON) 3818 self._match(TokenType.TABLE) # hive 3819 table = self._parse_table_parts(schema=True) 3820 else: 3821 unique = self._match(TokenType.UNIQUE) 3822 primary = self._match_text_seq("PRIMARY") 3823 amp = self._match_text_seq("AMP") 3824 3825 if not self._match(TokenType.INDEX): 3826 return None 3827 3828 index = self._parse_id_var() 3829 table = None 3830 3831 params = self._parse_index_params() 3832 3833 return self.expression( 3834 exp.Index, 3835 this=index, 3836 table=table, 3837 unique=unique, 3838 primary=primary, 3839 amp=amp, 3840 params=params, 3841 ) 3842 3843 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3844 hints: t.List[exp.Expression] = [] 3845 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3846 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3847 hints.append( 3848 self.expression( 3849 exp.WithTableHint, 3850 expressions=self._parse_csv( 3851 lambda: self._parse_function() or self._parse_var(any_token=True) 3852 ), 3853 ) 3854 ) 3855 self._match_r_paren() 3856 else: 3857 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3858 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3859 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3860 3861 self._match_set((TokenType.INDEX, TokenType.KEY)) 3862 if self._match(TokenType.FOR): 3863 hint.set("target", self._advance_any() and self._prev.text.upper()) 3864 3865 hint.set("expressions", self._parse_wrapped_id_vars()) 3866 hints.append(hint) 3867 3868 return hints or None 3869 3870 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3871 return ( 3872 (not schema and self._parse_function(optional_parens=False)) 3873 or self._parse_id_var(any_token=False) 3874 or self._parse_string_as_identifier() 3875 or self._parse_placeholder() 3876 ) 3877 3878 def _parse_table_parts( 3879 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3880 ) -> exp.Table: 3881 catalog = None 3882 db = None 3883 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3884 3885 while self._match(TokenType.DOT): 3886 if catalog: 3887 # This allows nesting the table in arbitrarily many dot expressions if needed 3888 table = self.expression( 3889 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3890 ) 3891 else: 3892 catalog = db 3893 db = table 3894 # "" used for tsql FROM a..b case 3895 table = self._parse_table_part(schema=schema) or "" 3896 3897 if ( 3898 wildcard 3899 and self._is_connected() 3900 and (isinstance(table, exp.Identifier) or not table) 3901 and self._match(TokenType.STAR) 3902 ): 3903 if isinstance(table, exp.Identifier): 3904 table.args["this"] += "*" 3905 else: 3906 table = exp.Identifier(this="*") 3907 3908 # We bubble up comments from the Identifier to the Table 3909 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3910 3911 if is_db_reference: 3912 catalog = db 3913 db = table 3914 table = None 3915 3916 if not table and not is_db_reference: 3917 self.raise_error(f"Expected table name but got {self._curr}") 3918 if not db and is_db_reference: 3919 self.raise_error(f"Expected database name but got {self._curr}") 3920 3921 table = self.expression( 3922 exp.Table, 3923 comments=comments, 3924 this=table, 3925 db=db, 3926 catalog=catalog, 3927 ) 3928 3929 changes = self._parse_changes() 3930 if changes: 3931 table.set("changes", changes) 3932 3933 at_before = self._parse_historical_data() 3934 if at_before: 3935 table.set("when", at_before) 3936 3937 pivots = self._parse_pivots() 3938 if pivots: 3939 table.set("pivots", pivots) 3940 3941 return table 3942 3943 def _parse_table( 3944 self, 3945 schema: bool = False, 3946 joins: bool = False, 3947 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3948 parse_bracket: bool = False, 3949 is_db_reference: bool = False, 3950 parse_partition: bool = False, 3951 ) -> t.Optional[exp.Expression]: 3952 lateral = self._parse_lateral() 3953 if lateral: 3954 return lateral 3955 3956 unnest = self._parse_unnest() 3957 if unnest: 3958 return unnest 3959 3960 values = self._parse_derived_table_values() 3961 if values: 3962 return values 3963 3964 subquery = self._parse_select(table=True) 3965 if subquery: 3966 if not subquery.args.get("pivots"): 3967 subquery.set("pivots", self._parse_pivots()) 3968 return subquery 3969 3970 bracket = parse_bracket and self._parse_bracket(None) 3971 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3972 3973 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3974 self._parse_table 3975 ) 3976 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3977 3978 only = self._match(TokenType.ONLY) 3979 3980 this = t.cast( 3981 exp.Expression, 3982 bracket 3983 or rows_from 3984 or self._parse_bracket( 3985 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3986 ), 3987 ) 3988 3989 if only: 3990 this.set("only", only) 3991 3992 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3993 self._match_text_seq("*") 3994 3995 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3996 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3997 this.set("partition", self._parse_partition()) 3998 3999 if schema: 4000 return self._parse_schema(this=this) 4001 4002 version = self._parse_version() 4003 4004 if version: 4005 this.set("version", version) 4006 4007 if self.dialect.ALIAS_POST_TABLESAMPLE: 4008 this.set("sample", self._parse_table_sample()) 4009 4010 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4011 if alias: 4012 this.set("alias", alias) 4013 4014 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4015 return self.expression( 4016 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4017 ) 4018 4019 this.set("hints", self._parse_table_hints()) 4020 4021 if not this.args.get("pivots"): 4022 this.set("pivots", self._parse_pivots()) 4023 4024 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4025 this.set("sample", self._parse_table_sample()) 4026 4027 if joins: 4028 for join in self._parse_joins(): 4029 this.append("joins", join) 4030 4031 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4032 this.set("ordinality", True) 4033 this.set("alias", self._parse_table_alias()) 4034 4035 return this 4036 4037 def _parse_version(self) -> t.Optional[exp.Version]: 4038 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4039 this = "TIMESTAMP" 4040 elif self._match(TokenType.VERSION_SNAPSHOT): 4041 this = "VERSION" 4042 else: 4043 return None 4044 4045 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4046 kind = self._prev.text.upper() 4047 start = self._parse_bitwise() 4048 self._match_texts(("TO", "AND")) 4049 end = self._parse_bitwise() 4050 expression: t.Optional[exp.Expression] = self.expression( 4051 exp.Tuple, expressions=[start, end] 4052 ) 4053 elif self._match_text_seq("CONTAINED", "IN"): 4054 kind = "CONTAINED IN" 4055 expression = self.expression( 4056 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4057 ) 4058 elif self._match(TokenType.ALL): 4059 kind = "ALL" 4060 expression = None 4061 else: 4062 self._match_text_seq("AS", "OF") 4063 kind = "AS OF" 4064 expression = self._parse_type() 4065 4066 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4067 4068 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4069 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4070 index = self._index 4071 historical_data = None 4072 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4073 this = self._prev.text.upper() 4074 kind = ( 4075 self._match(TokenType.L_PAREN) 4076 and self._match_texts(self.HISTORICAL_DATA_KIND) 4077 and self._prev.text.upper() 4078 ) 4079 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4080 4081 if expression: 4082 self._match_r_paren() 4083 historical_data = self.expression( 4084 exp.HistoricalData, this=this, kind=kind, expression=expression 4085 ) 4086 else: 4087 self._retreat(index) 4088 4089 return historical_data 4090 4091 def _parse_changes(self) -> t.Optional[exp.Changes]: 4092 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4093 return None 4094 4095 information = self._parse_var(any_token=True) 4096 self._match_r_paren() 4097 4098 return self.expression( 4099 exp.Changes, 4100 information=information, 4101 at_before=self._parse_historical_data(), 4102 end=self._parse_historical_data(), 4103 ) 4104 4105 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4106 if not self._match(TokenType.UNNEST): 4107 return None 4108 4109 expressions = self._parse_wrapped_csv(self._parse_equality) 4110 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4111 4112 alias = self._parse_table_alias() if with_alias else None 4113 4114 if alias: 4115 if self.dialect.UNNEST_COLUMN_ONLY: 4116 if alias.args.get("columns"): 4117 self.raise_error("Unexpected extra column alias in unnest.") 4118 4119 alias.set("columns", [alias.this]) 4120 alias.set("this", None) 4121 4122 columns = alias.args.get("columns") or [] 4123 if offset and len(expressions) < len(columns): 4124 offset = columns.pop() 4125 4126 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4127 self._match(TokenType.ALIAS) 4128 offset = self._parse_id_var( 4129 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4130 ) or exp.to_identifier("offset") 4131 4132 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4133 4134 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4135 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4136 if not is_derived and not ( 4137 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4138 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4139 ): 4140 return None 4141 4142 expressions = self._parse_csv(self._parse_value) 4143 alias = self._parse_table_alias() 4144 4145 if is_derived: 4146 self._match_r_paren() 4147 4148 return self.expression( 4149 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4150 ) 4151 4152 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4153 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4154 as_modifier and self._match_text_seq("USING", "SAMPLE") 4155 ): 4156 return None 4157 4158 bucket_numerator = None 4159 bucket_denominator = None 4160 bucket_field = None 4161 percent = None 4162 size = None 4163 seed = None 4164 4165 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4166 matched_l_paren = self._match(TokenType.L_PAREN) 4167 4168 if self.TABLESAMPLE_CSV: 4169 num = None 4170 expressions = self._parse_csv(self._parse_primary) 4171 else: 4172 expressions = None 4173 num = ( 4174 self._parse_factor() 4175 if self._match(TokenType.NUMBER, advance=False) 4176 else self._parse_primary() or self._parse_placeholder() 4177 ) 4178 4179 if self._match_text_seq("BUCKET"): 4180 bucket_numerator = self._parse_number() 4181 self._match_text_seq("OUT", "OF") 4182 bucket_denominator = bucket_denominator = self._parse_number() 4183 self._match(TokenType.ON) 4184 bucket_field = self._parse_field() 4185 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4186 percent = num 4187 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4188 size = num 4189 else: 4190 percent = num 4191 4192 if matched_l_paren: 4193 self._match_r_paren() 4194 4195 if self._match(TokenType.L_PAREN): 4196 method = self._parse_var(upper=True) 4197 seed = self._match(TokenType.COMMA) and self._parse_number() 4198 self._match_r_paren() 4199 elif self._match_texts(("SEED", "REPEATABLE")): 4200 seed = self._parse_wrapped(self._parse_number) 4201 4202 if not method and self.DEFAULT_SAMPLING_METHOD: 4203 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4204 4205 return self.expression( 4206 exp.TableSample, 4207 expressions=expressions, 4208 method=method, 4209 bucket_numerator=bucket_numerator, 4210 bucket_denominator=bucket_denominator, 4211 bucket_field=bucket_field, 4212 percent=percent, 4213 size=size, 4214 seed=seed, 4215 ) 4216 4217 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4218 return list(iter(self._parse_pivot, None)) or None 4219 4220 def _parse_joins(self) -> t.Iterator[exp.Join]: 4221 return iter(self._parse_join, None) 4222 4223 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4224 if not self._match(TokenType.INTO): 4225 return None 4226 4227 return self.expression( 4228 exp.UnpivotColumns, 4229 this=self._match_text_seq("NAME") and self._parse_column(), 4230 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4231 ) 4232 4233 # https://duckdb.org/docs/sql/statements/pivot 4234 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4235 def _parse_on() -> t.Optional[exp.Expression]: 4236 this = self._parse_bitwise() 4237 4238 if self._match(TokenType.IN): 4239 # PIVOT ... ON col IN (row_val1, row_val2) 4240 return self._parse_in(this) 4241 if self._match(TokenType.ALIAS, advance=False): 4242 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4243 return self._parse_alias(this) 4244 4245 return this 4246 4247 this = self._parse_table() 4248 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4249 into = self._parse_unpivot_columns() 4250 using = self._match(TokenType.USING) and self._parse_csv( 4251 lambda: self._parse_alias(self._parse_function()) 4252 ) 4253 group = self._parse_group() 4254 4255 return self.expression( 4256 exp.Pivot, 4257 this=this, 4258 expressions=expressions, 4259 using=using, 4260 group=group, 4261 unpivot=is_unpivot, 4262 into=into, 4263 ) 4264 4265 def _parse_pivot_in(self) -> exp.In: 4266 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4267 this = self._parse_select_or_expression() 4268 4269 self._match(TokenType.ALIAS) 4270 alias = self._parse_bitwise() 4271 if alias: 4272 if isinstance(alias, exp.Column) and not alias.db: 4273 alias = alias.this 4274 return self.expression(exp.PivotAlias, this=this, alias=alias) 4275 4276 return this 4277 4278 value = self._parse_column() 4279 4280 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4281 self.raise_error("Expecting IN (") 4282 4283 if self._match(TokenType.ANY): 4284 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4285 else: 4286 exprs = self._parse_csv(_parse_aliased_expression) 4287 4288 self._match_r_paren() 4289 return self.expression(exp.In, this=value, expressions=exprs) 4290 4291 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4292 index = self._index 4293 include_nulls = None 4294 4295 if self._match(TokenType.PIVOT): 4296 unpivot = False 4297 elif self._match(TokenType.UNPIVOT): 4298 unpivot = True 4299 4300 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4301 if self._match_text_seq("INCLUDE", "NULLS"): 4302 include_nulls = True 4303 elif self._match_text_seq("EXCLUDE", "NULLS"): 4304 include_nulls = False 4305 else: 4306 return None 4307 4308 expressions = [] 4309 4310 if not self._match(TokenType.L_PAREN): 4311 self._retreat(index) 4312 return None 4313 4314 if unpivot: 4315 expressions = self._parse_csv(self._parse_column) 4316 else: 4317 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4318 4319 if not expressions: 4320 self.raise_error("Failed to parse PIVOT's aggregation list") 4321 4322 if not self._match(TokenType.FOR): 4323 self.raise_error("Expecting FOR") 4324 4325 fields = [] 4326 while True: 4327 field = self._try_parse(self._parse_pivot_in) 4328 if not field: 4329 break 4330 fields.append(field) 4331 4332 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4333 self._parse_bitwise 4334 ) 4335 4336 group = self._parse_group() 4337 4338 self._match_r_paren() 4339 4340 pivot = self.expression( 4341 exp.Pivot, 4342 expressions=expressions, 4343 fields=fields, 4344 unpivot=unpivot, 4345 include_nulls=include_nulls, 4346 default_on_null=default_on_null, 4347 group=group, 4348 ) 4349 4350 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4351 pivot.set("alias", self._parse_table_alias()) 4352 4353 if not unpivot: 4354 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4355 4356 columns: t.List[exp.Expression] = [] 4357 all_fields = [] 4358 for pivot_field in pivot.fields: 4359 pivot_field_expressions = pivot_field.expressions 4360 4361 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4362 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4363 continue 4364 4365 all_fields.append( 4366 [ 4367 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4368 for fld in pivot_field_expressions 4369 ] 4370 ) 4371 4372 if all_fields: 4373 if names: 4374 all_fields.append(names) 4375 4376 # Generate all possible combinations of the pivot columns 4377 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4378 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4379 for fld_parts_tuple in itertools.product(*all_fields): 4380 fld_parts = list(fld_parts_tuple) 4381 4382 if names and self.PREFIXED_PIVOT_COLUMNS: 4383 # Move the "name" to the front of the list 4384 fld_parts.insert(0, fld_parts.pop(-1)) 4385 4386 columns.append(exp.to_identifier("_".join(fld_parts))) 4387 4388 pivot.set("columns", columns) 4389 4390 return pivot 4391 4392 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4393 return [agg.alias for agg in aggregations if agg.alias] 4394 4395 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4396 if not skip_where_token and not self._match(TokenType.PREWHERE): 4397 return None 4398 4399 return self.expression( 4400 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4401 ) 4402 4403 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4404 if not skip_where_token and not self._match(TokenType.WHERE): 4405 return None 4406 4407 return self.expression( 4408 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4409 ) 4410 4411 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4412 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4413 return None 4414 4415 elements: t.Dict[str, t.Any] = defaultdict(list) 4416 4417 if self._match(TokenType.ALL): 4418 elements["all"] = True 4419 elif self._match(TokenType.DISTINCT): 4420 elements["all"] = False 4421 4422 while True: 4423 index = self._index 4424 4425 elements["expressions"].extend( 4426 self._parse_csv( 4427 lambda: None 4428 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4429 else self._parse_assignment() 4430 ) 4431 ) 4432 4433 before_with_index = self._index 4434 with_prefix = self._match(TokenType.WITH) 4435 4436 if self._match(TokenType.ROLLUP): 4437 elements["rollup"].append( 4438 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4439 ) 4440 elif self._match(TokenType.CUBE): 4441 elements["cube"].append( 4442 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4443 ) 4444 elif self._match(TokenType.GROUPING_SETS): 4445 elements["grouping_sets"].append( 4446 self.expression( 4447 exp.GroupingSets, 4448 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4449 ) 4450 ) 4451 elif self._match_text_seq("TOTALS"): 4452 elements["totals"] = True # type: ignore 4453 4454 if before_with_index <= self._index <= before_with_index + 1: 4455 self._retreat(before_with_index) 4456 break 4457 4458 if index == self._index: 4459 break 4460 4461 return self.expression(exp.Group, **elements) # type: ignore 4462 4463 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4464 return self.expression( 4465 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4466 ) 4467 4468 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4469 if self._match(TokenType.L_PAREN): 4470 grouping_set = self._parse_csv(self._parse_column) 4471 self._match_r_paren() 4472 return self.expression(exp.Tuple, expressions=grouping_set) 4473 4474 return self._parse_column() 4475 4476 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4477 if not skip_having_token and not self._match(TokenType.HAVING): 4478 return None 4479 return self.expression(exp.Having, this=self._parse_assignment()) 4480 4481 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4482 if not self._match(TokenType.QUALIFY): 4483 return None 4484 return self.expression(exp.Qualify, this=self._parse_assignment()) 4485 4486 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4487 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4488 exp.Prior, this=self._parse_bitwise() 4489 ) 4490 connect = self._parse_assignment() 4491 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4492 return connect 4493 4494 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4495 if skip_start_token: 4496 start = None 4497 elif self._match(TokenType.START_WITH): 4498 start = self._parse_assignment() 4499 else: 4500 return None 4501 4502 self._match(TokenType.CONNECT_BY) 4503 nocycle = self._match_text_seq("NOCYCLE") 4504 connect = self._parse_connect_with_prior() 4505 4506 if not start and self._match(TokenType.START_WITH): 4507 start = self._parse_assignment() 4508 4509 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4510 4511 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4512 this = self._parse_id_var(any_token=True) 4513 if self._match(TokenType.ALIAS): 4514 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4515 return this 4516 4517 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4518 if self._match_text_seq("INTERPOLATE"): 4519 return self._parse_wrapped_csv(self._parse_name_as_expression) 4520 return None 4521 4522 def _parse_order( 4523 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4524 ) -> t.Optional[exp.Expression]: 4525 siblings = None 4526 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4527 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4528 return this 4529 4530 siblings = True 4531 4532 return self.expression( 4533 exp.Order, 4534 this=this, 4535 expressions=self._parse_csv(self._parse_ordered), 4536 siblings=siblings, 4537 ) 4538 4539 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4540 if not self._match(token): 4541 return None 4542 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4543 4544 def _parse_ordered( 4545 self, parse_method: t.Optional[t.Callable] = None 4546 ) -> t.Optional[exp.Ordered]: 4547 this = parse_method() if parse_method else self._parse_assignment() 4548 if not this: 4549 return None 4550 4551 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4552 this = exp.var("ALL") 4553 4554 asc = self._match(TokenType.ASC) 4555 desc = self._match(TokenType.DESC) or (asc and False) 4556 4557 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4558 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4559 4560 nulls_first = is_nulls_first or False 4561 explicitly_null_ordered = is_nulls_first or is_nulls_last 4562 4563 if ( 4564 not explicitly_null_ordered 4565 and ( 4566 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4567 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4568 ) 4569 and self.dialect.NULL_ORDERING != "nulls_are_last" 4570 ): 4571 nulls_first = True 4572 4573 if self._match_text_seq("WITH", "FILL"): 4574 with_fill = self.expression( 4575 exp.WithFill, 4576 **{ # type: ignore 4577 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4578 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4579 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4580 "interpolate": self._parse_interpolate(), 4581 }, 4582 ) 4583 else: 4584 with_fill = None 4585 4586 return self.expression( 4587 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4588 ) 4589 4590 def _parse_limit_options(self) -> exp.LimitOptions: 4591 percent = self._match(TokenType.PERCENT) 4592 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4593 self._match_text_seq("ONLY") 4594 with_ties = self._match_text_seq("WITH", "TIES") 4595 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4596 4597 def _parse_limit( 4598 self, 4599 this: t.Optional[exp.Expression] = None, 4600 top: bool = False, 4601 skip_limit_token: bool = False, 4602 ) -> t.Optional[exp.Expression]: 4603 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4604 comments = self._prev_comments 4605 if top: 4606 limit_paren = self._match(TokenType.L_PAREN) 4607 expression = self._parse_term() if limit_paren else self._parse_number() 4608 4609 if limit_paren: 4610 self._match_r_paren() 4611 4612 limit_options = self._parse_limit_options() 4613 else: 4614 limit_options = None 4615 expression = self._parse_term() 4616 4617 if self._match(TokenType.COMMA): 4618 offset = expression 4619 expression = self._parse_term() 4620 else: 4621 offset = None 4622 4623 limit_exp = self.expression( 4624 exp.Limit, 4625 this=this, 4626 expression=expression, 4627 offset=offset, 4628 comments=comments, 4629 limit_options=limit_options, 4630 expressions=self._parse_limit_by(), 4631 ) 4632 4633 return limit_exp 4634 4635 if self._match(TokenType.FETCH): 4636 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4637 direction = self._prev.text.upper() if direction else "FIRST" 4638 4639 count = self._parse_field(tokens=self.FETCH_TOKENS) 4640 4641 return self.expression( 4642 exp.Fetch, 4643 direction=direction, 4644 count=count, 4645 limit_options=self._parse_limit_options(), 4646 ) 4647 4648 return this 4649 4650 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4651 if not self._match(TokenType.OFFSET): 4652 return this 4653 4654 count = self._parse_term() 4655 self._match_set((TokenType.ROW, TokenType.ROWS)) 4656 4657 return self.expression( 4658 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4659 ) 4660 4661 def _can_parse_limit_or_offset(self) -> bool: 4662 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4663 return False 4664 4665 index = self._index 4666 result = bool( 4667 self._try_parse(self._parse_limit, retreat=True) 4668 or self._try_parse(self._parse_offset, retreat=True) 4669 ) 4670 self._retreat(index) 4671 return result 4672 4673 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4674 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4675 4676 def _parse_locks(self) -> t.List[exp.Lock]: 4677 locks = [] 4678 while True: 4679 if self._match_text_seq("FOR", "UPDATE"): 4680 update = True 4681 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4682 "LOCK", "IN", "SHARE", "MODE" 4683 ): 4684 update = False 4685 else: 4686 break 4687 4688 expressions = None 4689 if self._match_text_seq("OF"): 4690 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4691 4692 wait: t.Optional[bool | exp.Expression] = None 4693 if self._match_text_seq("NOWAIT"): 4694 wait = True 4695 elif self._match_text_seq("WAIT"): 4696 wait = self._parse_primary() 4697 elif self._match_text_seq("SKIP", "LOCKED"): 4698 wait = False 4699 4700 locks.append( 4701 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4702 ) 4703 4704 return locks 4705 4706 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4707 start = self._index 4708 _, side_token, kind_token = self._parse_join_parts() 4709 4710 side = side_token.text if side_token else None 4711 kind = kind_token.text if kind_token else None 4712 4713 if not self._match_set(self.SET_OPERATIONS): 4714 self._retreat(start) 4715 return None 4716 4717 token_type = self._prev.token_type 4718 4719 if token_type == TokenType.UNION: 4720 operation: t.Type[exp.SetOperation] = exp.Union 4721 elif token_type == TokenType.EXCEPT: 4722 operation = exp.Except 4723 else: 4724 operation = exp.Intersect 4725 4726 comments = self._prev.comments 4727 4728 if self._match(TokenType.DISTINCT): 4729 distinct: t.Optional[bool] = True 4730 elif self._match(TokenType.ALL): 4731 distinct = False 4732 else: 4733 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4734 if distinct is None: 4735 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4736 4737 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4738 "STRICT", "CORRESPONDING" 4739 ) 4740 if self._match_text_seq("CORRESPONDING"): 4741 by_name = True 4742 if not side and not kind: 4743 kind = "INNER" 4744 4745 on_column_list = None 4746 if by_name and self._match_texts(("ON", "BY")): 4747 on_column_list = self._parse_wrapped_csv(self._parse_column) 4748 4749 expression = self._parse_select(nested=True, parse_set_operation=False) 4750 4751 return self.expression( 4752 operation, 4753 comments=comments, 4754 this=this, 4755 distinct=distinct, 4756 by_name=by_name, 4757 expression=expression, 4758 side=side, 4759 kind=kind, 4760 on=on_column_list, 4761 ) 4762 4763 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4764 while this: 4765 setop = self.parse_set_operation(this) 4766 if not setop: 4767 break 4768 this = setop 4769 4770 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4771 expression = this.expression 4772 4773 if expression: 4774 for arg in self.SET_OP_MODIFIERS: 4775 expr = expression.args.get(arg) 4776 if expr: 4777 this.set(arg, expr.pop()) 4778 4779 return this 4780 4781 def _parse_expression(self) -> t.Optional[exp.Expression]: 4782 return self._parse_alias(self._parse_assignment()) 4783 4784 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4785 this = self._parse_disjunction() 4786 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4787 # This allows us to parse <non-identifier token> := <expr> 4788 this = exp.column( 4789 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4790 ) 4791 4792 while self._match_set(self.ASSIGNMENT): 4793 if isinstance(this, exp.Column) and len(this.parts) == 1: 4794 this = this.this 4795 4796 this = self.expression( 4797 self.ASSIGNMENT[self._prev.token_type], 4798 this=this, 4799 comments=self._prev_comments, 4800 expression=self._parse_assignment(), 4801 ) 4802 4803 return this 4804 4805 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4806 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4807 4808 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4809 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4810 4811 def _parse_equality(self) -> t.Optional[exp.Expression]: 4812 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4813 4814 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4815 return self._parse_tokens(self._parse_range, self.COMPARISON) 4816 4817 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4818 this = this or self._parse_bitwise() 4819 negate = self._match(TokenType.NOT) 4820 4821 if self._match_set(self.RANGE_PARSERS): 4822 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4823 if not expression: 4824 return this 4825 4826 this = expression 4827 elif self._match(TokenType.ISNULL): 4828 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4829 4830 # Postgres supports ISNULL and NOTNULL for conditions. 4831 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4832 if self._match(TokenType.NOTNULL): 4833 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4834 this = self.expression(exp.Not, this=this) 4835 4836 if negate: 4837 this = self._negate_range(this) 4838 4839 if self._match(TokenType.IS): 4840 this = self._parse_is(this) 4841 4842 return this 4843 4844 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4845 if not this: 4846 return this 4847 4848 return self.expression(exp.Not, this=this) 4849 4850 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4851 index = self._index - 1 4852 negate = self._match(TokenType.NOT) 4853 4854 if self._match_text_seq("DISTINCT", "FROM"): 4855 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4856 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4857 4858 if self._match(TokenType.JSON): 4859 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4860 4861 if self._match_text_seq("WITH"): 4862 _with = True 4863 elif self._match_text_seq("WITHOUT"): 4864 _with = False 4865 else: 4866 _with = None 4867 4868 unique = self._match(TokenType.UNIQUE) 4869 self._match_text_seq("KEYS") 4870 expression: t.Optional[exp.Expression] = self.expression( 4871 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4872 ) 4873 else: 4874 expression = self._parse_primary() or self._parse_null() 4875 if not expression: 4876 self._retreat(index) 4877 return None 4878 4879 this = self.expression(exp.Is, this=this, expression=expression) 4880 return self.expression(exp.Not, this=this) if negate else this 4881 4882 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4883 unnest = self._parse_unnest(with_alias=False) 4884 if unnest: 4885 this = self.expression(exp.In, this=this, unnest=unnest) 4886 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4887 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4888 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4889 4890 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4891 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4892 else: 4893 this = self.expression(exp.In, this=this, expressions=expressions) 4894 4895 if matched_l_paren: 4896 self._match_r_paren(this) 4897 elif not self._match(TokenType.R_BRACKET, expression=this): 4898 self.raise_error("Expecting ]") 4899 else: 4900 this = self.expression(exp.In, this=this, field=self._parse_column()) 4901 4902 return this 4903 4904 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4905 low = self._parse_bitwise() 4906 self._match(TokenType.AND) 4907 high = self._parse_bitwise() 4908 return self.expression(exp.Between, this=this, low=low, high=high) 4909 4910 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4911 if not self._match(TokenType.ESCAPE): 4912 return this 4913 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4914 4915 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4916 index = self._index 4917 4918 if not self._match(TokenType.INTERVAL) and match_interval: 4919 return None 4920 4921 if self._match(TokenType.STRING, advance=False): 4922 this = self._parse_primary() 4923 else: 4924 this = self._parse_term() 4925 4926 if not this or ( 4927 isinstance(this, exp.Column) 4928 and not this.table 4929 and not this.this.quoted 4930 and this.name.upper() == "IS" 4931 ): 4932 self._retreat(index) 4933 return None 4934 4935 unit = self._parse_function() or ( 4936 not self._match(TokenType.ALIAS, advance=False) 4937 and self._parse_var(any_token=True, upper=True) 4938 ) 4939 4940 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4941 # each INTERVAL expression into this canonical form so it's easy to transpile 4942 if this and this.is_number: 4943 this = exp.Literal.string(this.to_py()) 4944 elif this and this.is_string: 4945 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4946 if parts and unit: 4947 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4948 unit = None 4949 self._retreat(self._index - 1) 4950 4951 if len(parts) == 1: 4952 this = exp.Literal.string(parts[0][0]) 4953 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4954 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4955 unit = self.expression( 4956 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4957 ) 4958 4959 interval = self.expression(exp.Interval, this=this, unit=unit) 4960 4961 index = self._index 4962 self._match(TokenType.PLUS) 4963 4964 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4965 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4966 return self.expression( 4967 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4968 ) 4969 4970 self._retreat(index) 4971 return interval 4972 4973 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4974 this = self._parse_term() 4975 4976 while True: 4977 if self._match_set(self.BITWISE): 4978 this = self.expression( 4979 self.BITWISE[self._prev.token_type], 4980 this=this, 4981 expression=self._parse_term(), 4982 ) 4983 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4984 this = self.expression( 4985 exp.DPipe, 4986 this=this, 4987 expression=self._parse_term(), 4988 safe=not self.dialect.STRICT_STRING_CONCAT, 4989 ) 4990 elif self._match(TokenType.DQMARK): 4991 this = self.expression( 4992 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4993 ) 4994 elif self._match_pair(TokenType.LT, TokenType.LT): 4995 this = self.expression( 4996 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4997 ) 4998 elif self._match_pair(TokenType.GT, TokenType.GT): 4999 this = self.expression( 5000 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5001 ) 5002 else: 5003 break 5004 5005 return this 5006 5007 def _parse_term(self) -> t.Optional[exp.Expression]: 5008 this = self._parse_factor() 5009 5010 while self._match_set(self.TERM): 5011 klass = self.TERM[self._prev.token_type] 5012 comments = self._prev_comments 5013 expression = self._parse_factor() 5014 5015 this = self.expression(klass, this=this, comments=comments, expression=expression) 5016 5017 if isinstance(this, exp.Collate): 5018 expr = this.expression 5019 5020 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5021 # fallback to Identifier / Var 5022 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5023 ident = expr.this 5024 if isinstance(ident, exp.Identifier): 5025 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5026 5027 return this 5028 5029 def _parse_factor(self) -> t.Optional[exp.Expression]: 5030 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5031 this = parse_method() 5032 5033 while self._match_set(self.FACTOR): 5034 klass = self.FACTOR[self._prev.token_type] 5035 comments = self._prev_comments 5036 expression = parse_method() 5037 5038 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5039 self._retreat(self._index - 1) 5040 return this 5041 5042 this = self.expression(klass, this=this, comments=comments, expression=expression) 5043 5044 if isinstance(this, exp.Div): 5045 this.args["typed"] = self.dialect.TYPED_DIVISION 5046 this.args["safe"] = self.dialect.SAFE_DIVISION 5047 5048 return this 5049 5050 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5051 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5052 5053 def _parse_unary(self) -> t.Optional[exp.Expression]: 5054 if self._match_set(self.UNARY_PARSERS): 5055 return self.UNARY_PARSERS[self._prev.token_type](self) 5056 return self._parse_at_time_zone(self._parse_type()) 5057 5058 def _parse_type( 5059 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5060 ) -> t.Optional[exp.Expression]: 5061 interval = parse_interval and self._parse_interval() 5062 if interval: 5063 return interval 5064 5065 index = self._index 5066 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5067 5068 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5069 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5070 if isinstance(data_type, exp.Cast): 5071 # This constructor can contain ops directly after it, for instance struct unnesting: 5072 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5073 return self._parse_column_ops(data_type) 5074 5075 if data_type: 5076 index2 = self._index 5077 this = self._parse_primary() 5078 5079 if isinstance(this, exp.Literal): 5080 this = self._parse_column_ops(this) 5081 5082 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5083 if parser: 5084 return parser(self, this, data_type) 5085 5086 return self.expression(exp.Cast, this=this, to=data_type) 5087 5088 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5089 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5090 # 5091 # If the index difference here is greater than 1, that means the parser itself must have 5092 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5093 # 5094 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5095 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5096 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5097 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5098 # 5099 # In these cases, we don't really want to return the converted type, but instead retreat 5100 # and try to parse a Column or Identifier in the section below. 5101 if data_type.expressions and index2 - index > 1: 5102 self._retreat(index2) 5103 return self._parse_column_ops(data_type) 5104 5105 self._retreat(index) 5106 5107 if fallback_to_identifier: 5108 return self._parse_id_var() 5109 5110 this = self._parse_column() 5111 return this and self._parse_column_ops(this) 5112 5113 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5114 this = self._parse_type() 5115 if not this: 5116 return None 5117 5118 if isinstance(this, exp.Column) and not this.table: 5119 this = exp.var(this.name.upper()) 5120 5121 return self.expression( 5122 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5123 ) 5124 5125 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5126 type_name = identifier.name 5127 5128 while self._match(TokenType.DOT): 5129 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5130 5131 return exp.DataType.build(type_name, udt=True) 5132 5133 def _parse_types( 5134 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5135 ) -> t.Optional[exp.Expression]: 5136 index = self._index 5137 5138 this: t.Optional[exp.Expression] = None 5139 prefix = self._match_text_seq("SYSUDTLIB", ".") 5140 5141 if not self._match_set(self.TYPE_TOKENS): 5142 identifier = allow_identifiers and self._parse_id_var( 5143 any_token=False, tokens=(TokenType.VAR,) 5144 ) 5145 if isinstance(identifier, exp.Identifier): 5146 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5147 5148 if len(tokens) != 1: 5149 self.raise_error("Unexpected identifier", self._prev) 5150 5151 if tokens[0].token_type in self.TYPE_TOKENS: 5152 self._prev = tokens[0] 5153 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5154 this = self._parse_user_defined_type(identifier) 5155 else: 5156 self._retreat(self._index - 1) 5157 return None 5158 else: 5159 return None 5160 5161 type_token = self._prev.token_type 5162 5163 if type_token == TokenType.PSEUDO_TYPE: 5164 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5165 5166 if type_token == TokenType.OBJECT_IDENTIFIER: 5167 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5168 5169 # https://materialize.com/docs/sql/types/map/ 5170 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5171 key_type = self._parse_types( 5172 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5173 ) 5174 if not self._match(TokenType.FARROW): 5175 self._retreat(index) 5176 return None 5177 5178 value_type = self._parse_types( 5179 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5180 ) 5181 if not self._match(TokenType.R_BRACKET): 5182 self._retreat(index) 5183 return None 5184 5185 return exp.DataType( 5186 this=exp.DataType.Type.MAP, 5187 expressions=[key_type, value_type], 5188 nested=True, 5189 prefix=prefix, 5190 ) 5191 5192 nested = type_token in self.NESTED_TYPE_TOKENS 5193 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5194 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5195 expressions = None 5196 maybe_func = False 5197 5198 if self._match(TokenType.L_PAREN): 5199 if is_struct: 5200 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5201 elif nested: 5202 expressions = self._parse_csv( 5203 lambda: self._parse_types( 5204 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5205 ) 5206 ) 5207 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5208 this = expressions[0] 5209 this.set("nullable", True) 5210 self._match_r_paren() 5211 return this 5212 elif type_token in self.ENUM_TYPE_TOKENS: 5213 expressions = self._parse_csv(self._parse_equality) 5214 elif is_aggregate: 5215 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5216 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5217 ) 5218 if not func_or_ident: 5219 return None 5220 expressions = [func_or_ident] 5221 if self._match(TokenType.COMMA): 5222 expressions.extend( 5223 self._parse_csv( 5224 lambda: self._parse_types( 5225 check_func=check_func, 5226 schema=schema, 5227 allow_identifiers=allow_identifiers, 5228 ) 5229 ) 5230 ) 5231 else: 5232 expressions = self._parse_csv(self._parse_type_size) 5233 5234 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5235 if type_token == TokenType.VECTOR and len(expressions) == 2: 5236 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5237 5238 if not expressions or not self._match(TokenType.R_PAREN): 5239 self._retreat(index) 5240 return None 5241 5242 maybe_func = True 5243 5244 values: t.Optional[t.List[exp.Expression]] = None 5245 5246 if nested and self._match(TokenType.LT): 5247 if is_struct: 5248 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5249 else: 5250 expressions = self._parse_csv( 5251 lambda: self._parse_types( 5252 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5253 ) 5254 ) 5255 5256 if not self._match(TokenType.GT): 5257 self.raise_error("Expecting >") 5258 5259 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5260 values = self._parse_csv(self._parse_assignment) 5261 if not values and is_struct: 5262 values = None 5263 self._retreat(self._index - 1) 5264 else: 5265 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5266 5267 if type_token in self.TIMESTAMPS: 5268 if self._match_text_seq("WITH", "TIME", "ZONE"): 5269 maybe_func = False 5270 tz_type = ( 5271 exp.DataType.Type.TIMETZ 5272 if type_token in self.TIMES 5273 else exp.DataType.Type.TIMESTAMPTZ 5274 ) 5275 this = exp.DataType(this=tz_type, expressions=expressions) 5276 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5277 maybe_func = False 5278 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5279 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5280 maybe_func = False 5281 elif type_token == TokenType.INTERVAL: 5282 unit = self._parse_var(upper=True) 5283 if unit: 5284 if self._match_text_seq("TO"): 5285 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5286 5287 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5288 else: 5289 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5290 elif type_token == TokenType.VOID: 5291 this = exp.DataType(this=exp.DataType.Type.NULL) 5292 5293 if maybe_func and check_func: 5294 index2 = self._index 5295 peek = self._parse_string() 5296 5297 if not peek: 5298 self._retreat(index) 5299 return None 5300 5301 self._retreat(index2) 5302 5303 if not this: 5304 if self._match_text_seq("UNSIGNED"): 5305 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5306 if not unsigned_type_token: 5307 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5308 5309 type_token = unsigned_type_token or type_token 5310 5311 this = exp.DataType( 5312 this=exp.DataType.Type[type_token.value], 5313 expressions=expressions, 5314 nested=nested, 5315 prefix=prefix, 5316 ) 5317 5318 # Empty arrays/structs are allowed 5319 if values is not None: 5320 cls = exp.Struct if is_struct else exp.Array 5321 this = exp.cast(cls(expressions=values), this, copy=False) 5322 5323 elif expressions: 5324 this.set("expressions", expressions) 5325 5326 # https://materialize.com/docs/sql/types/list/#type-name 5327 while self._match(TokenType.LIST): 5328 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5329 5330 index = self._index 5331 5332 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5333 matched_array = self._match(TokenType.ARRAY) 5334 5335 while self._curr: 5336 datatype_token = self._prev.token_type 5337 matched_l_bracket = self._match(TokenType.L_BRACKET) 5338 5339 if (not matched_l_bracket and not matched_array) or ( 5340 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5341 ): 5342 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5343 # not to be confused with the fixed size array parsing 5344 break 5345 5346 matched_array = False 5347 values = self._parse_csv(self._parse_assignment) or None 5348 if ( 5349 values 5350 and not schema 5351 and ( 5352 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5353 ) 5354 ): 5355 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5356 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5357 self._retreat(index) 5358 break 5359 5360 this = exp.DataType( 5361 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5362 ) 5363 self._match(TokenType.R_BRACKET) 5364 5365 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5366 converter = self.TYPE_CONVERTERS.get(this.this) 5367 if converter: 5368 this = converter(t.cast(exp.DataType, this)) 5369 5370 return this 5371 5372 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5373 index = self._index 5374 5375 if ( 5376 self._curr 5377 and self._next 5378 and self._curr.token_type in self.TYPE_TOKENS 5379 and self._next.token_type in self.TYPE_TOKENS 5380 ): 5381 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5382 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5383 this = self._parse_id_var() 5384 else: 5385 this = ( 5386 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5387 or self._parse_id_var() 5388 ) 5389 5390 self._match(TokenType.COLON) 5391 5392 if ( 5393 type_required 5394 and not isinstance(this, exp.DataType) 5395 and not self._match_set(self.TYPE_TOKENS, advance=False) 5396 ): 5397 self._retreat(index) 5398 return self._parse_types() 5399 5400 return self._parse_column_def(this) 5401 5402 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5403 if not self._match_text_seq("AT", "TIME", "ZONE"): 5404 return this 5405 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5406 5407 def _parse_column(self) -> t.Optional[exp.Expression]: 5408 this = self._parse_column_reference() 5409 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5410 5411 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5412 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5413 5414 return column 5415 5416 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5417 this = self._parse_field() 5418 if ( 5419 not this 5420 and self._match(TokenType.VALUES, advance=False) 5421 and self.VALUES_FOLLOWED_BY_PAREN 5422 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5423 ): 5424 this = self._parse_id_var() 5425 5426 if isinstance(this, exp.Identifier): 5427 # We bubble up comments from the Identifier to the Column 5428 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5429 5430 return this 5431 5432 def _parse_colon_as_variant_extract( 5433 self, this: t.Optional[exp.Expression] 5434 ) -> t.Optional[exp.Expression]: 5435 casts = [] 5436 json_path = [] 5437 escape = None 5438 5439 while self._match(TokenType.COLON): 5440 start_index = self._index 5441 5442 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5443 path = self._parse_column_ops( 5444 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5445 ) 5446 5447 # The cast :: operator has a lower precedence than the extraction operator :, so 5448 # we rearrange the AST appropriately to avoid casting the JSON path 5449 while isinstance(path, exp.Cast): 5450 casts.append(path.to) 5451 path = path.this 5452 5453 if casts: 5454 dcolon_offset = next( 5455 i 5456 for i, t in enumerate(self._tokens[start_index:]) 5457 if t.token_type == TokenType.DCOLON 5458 ) 5459 end_token = self._tokens[start_index + dcolon_offset - 1] 5460 else: 5461 end_token = self._prev 5462 5463 if path: 5464 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5465 # it'll roundtrip to a string literal in GET_PATH 5466 if isinstance(path, exp.Identifier) and path.quoted: 5467 escape = True 5468 5469 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5470 5471 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5472 # Databricks transforms it back to the colon/dot notation 5473 if json_path: 5474 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5475 5476 if json_path_expr: 5477 json_path_expr.set("escape", escape) 5478 5479 this = self.expression( 5480 exp.JSONExtract, 5481 this=this, 5482 expression=json_path_expr, 5483 variant_extract=True, 5484 ) 5485 5486 while casts: 5487 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5488 5489 return this 5490 5491 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5492 return self._parse_types() 5493 5494 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5495 this = self._parse_bracket(this) 5496 5497 while self._match_set(self.COLUMN_OPERATORS): 5498 op_token = self._prev.token_type 5499 op = self.COLUMN_OPERATORS.get(op_token) 5500 5501 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5502 field = self._parse_dcolon() 5503 if not field: 5504 self.raise_error("Expected type") 5505 elif op and self._curr: 5506 field = self._parse_column_reference() or self._parse_bracket() 5507 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5508 field = self._parse_column_ops(field) 5509 else: 5510 field = self._parse_field(any_token=True, anonymous_func=True) 5511 5512 # Function calls can be qualified, e.g., x.y.FOO() 5513 # This converts the final AST to a series of Dots leading to the function call 5514 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5515 if isinstance(field, (exp.Func, exp.Window)) and this: 5516 this = this.transform( 5517 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5518 ) 5519 5520 if op: 5521 this = op(self, this, field) 5522 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5523 this = self.expression( 5524 exp.Column, 5525 comments=this.comments, 5526 this=field, 5527 table=this.this, 5528 db=this.args.get("table"), 5529 catalog=this.args.get("db"), 5530 ) 5531 elif isinstance(field, exp.Window): 5532 # Move the exp.Dot's to the window's function 5533 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5534 field.set("this", window_func) 5535 this = field 5536 else: 5537 this = self.expression(exp.Dot, this=this, expression=field) 5538 5539 if field and field.comments: 5540 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5541 5542 this = self._parse_bracket(this) 5543 5544 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5545 5546 def _parse_primary(self) -> t.Optional[exp.Expression]: 5547 if self._match_set(self.PRIMARY_PARSERS): 5548 token_type = self._prev.token_type 5549 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5550 5551 if token_type == TokenType.STRING: 5552 expressions = [primary] 5553 while self._match(TokenType.STRING): 5554 expressions.append(exp.Literal.string(self._prev.text)) 5555 5556 if len(expressions) > 1: 5557 return self.expression(exp.Concat, expressions=expressions) 5558 5559 return primary 5560 5561 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5562 return exp.Literal.number(f"0.{self._prev.text}") 5563 5564 if self._match(TokenType.L_PAREN): 5565 comments = self._prev_comments 5566 query = self._parse_select() 5567 5568 if query: 5569 expressions = [query] 5570 else: 5571 expressions = self._parse_expressions() 5572 5573 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5574 5575 if not this and self._match(TokenType.R_PAREN, advance=False): 5576 this = self.expression(exp.Tuple) 5577 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5578 this = self._parse_subquery(this=this, parse_alias=False) 5579 elif isinstance(this, exp.Subquery): 5580 this = self._parse_subquery( 5581 this=self._parse_set_operations(this), parse_alias=False 5582 ) 5583 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5584 this = self.expression(exp.Tuple, expressions=expressions) 5585 else: 5586 this = self.expression(exp.Paren, this=this) 5587 5588 if this: 5589 this.add_comments(comments) 5590 5591 self._match_r_paren(expression=this) 5592 return this 5593 5594 return None 5595 5596 def _parse_field( 5597 self, 5598 any_token: bool = False, 5599 tokens: t.Optional[t.Collection[TokenType]] = None, 5600 anonymous_func: bool = False, 5601 ) -> t.Optional[exp.Expression]: 5602 if anonymous_func: 5603 field = ( 5604 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5605 or self._parse_primary() 5606 ) 5607 else: 5608 field = self._parse_primary() or self._parse_function( 5609 anonymous=anonymous_func, any_token=any_token 5610 ) 5611 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5612 5613 def _parse_function( 5614 self, 5615 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5616 anonymous: bool = False, 5617 optional_parens: bool = True, 5618 any_token: bool = False, 5619 ) -> t.Optional[exp.Expression]: 5620 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5621 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5622 fn_syntax = False 5623 if ( 5624 self._match(TokenType.L_BRACE, advance=False) 5625 and self._next 5626 and self._next.text.upper() == "FN" 5627 ): 5628 self._advance(2) 5629 fn_syntax = True 5630 5631 func = self._parse_function_call( 5632 functions=functions, 5633 anonymous=anonymous, 5634 optional_parens=optional_parens, 5635 any_token=any_token, 5636 ) 5637 5638 if fn_syntax: 5639 self._match(TokenType.R_BRACE) 5640 5641 return func 5642 5643 def _parse_function_call( 5644 self, 5645 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5646 anonymous: bool = False, 5647 optional_parens: bool = True, 5648 any_token: bool = False, 5649 ) -> t.Optional[exp.Expression]: 5650 if not self._curr: 5651 return None 5652 5653 comments = self._curr.comments 5654 token = self._curr 5655 token_type = self._curr.token_type 5656 this = self._curr.text 5657 upper = this.upper() 5658 5659 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5660 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5661 self._advance() 5662 return self._parse_window(parser(self)) 5663 5664 if not self._next or self._next.token_type != TokenType.L_PAREN: 5665 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5666 self._advance() 5667 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5668 5669 return None 5670 5671 if any_token: 5672 if token_type in self.RESERVED_TOKENS: 5673 return None 5674 elif token_type not in self.FUNC_TOKENS: 5675 return None 5676 5677 self._advance(2) 5678 5679 parser = self.FUNCTION_PARSERS.get(upper) 5680 if parser and not anonymous: 5681 this = parser(self) 5682 else: 5683 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5684 5685 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5686 this = self.expression( 5687 subquery_predicate, comments=comments, this=self._parse_select() 5688 ) 5689 self._match_r_paren() 5690 return this 5691 5692 if functions is None: 5693 functions = self.FUNCTIONS 5694 5695 function = functions.get(upper) 5696 known_function = function and not anonymous 5697 5698 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5699 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5700 5701 post_func_comments = self._curr and self._curr.comments 5702 if known_function and post_func_comments: 5703 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5704 # call we'll construct it as exp.Anonymous, even if it's "known" 5705 if any( 5706 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5707 for comment in post_func_comments 5708 ): 5709 known_function = False 5710 5711 if alias and known_function: 5712 args = self._kv_to_prop_eq(args) 5713 5714 if known_function: 5715 func_builder = t.cast(t.Callable, function) 5716 5717 if "dialect" in func_builder.__code__.co_varnames: 5718 func = func_builder(args, dialect=self.dialect) 5719 else: 5720 func = func_builder(args) 5721 5722 func = self.validate_expression(func, args) 5723 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5724 func.meta["name"] = this 5725 5726 this = func 5727 else: 5728 if token_type == TokenType.IDENTIFIER: 5729 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5730 5731 this = self.expression(exp.Anonymous, this=this, expressions=args) 5732 this = this.update_positions(token) 5733 5734 if isinstance(this, exp.Expression): 5735 this.add_comments(comments) 5736 5737 self._match_r_paren(this) 5738 return self._parse_window(this) 5739 5740 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5741 return expression 5742 5743 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5744 transformed = [] 5745 5746 for index, e in enumerate(expressions): 5747 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5748 if isinstance(e, exp.Alias): 5749 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5750 5751 if not isinstance(e, exp.PropertyEQ): 5752 e = self.expression( 5753 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5754 ) 5755 5756 if isinstance(e.this, exp.Column): 5757 e.this.replace(e.this.this) 5758 else: 5759 e = self._to_prop_eq(e, index) 5760 5761 transformed.append(e) 5762 5763 return transformed 5764 5765 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5766 return self._parse_statement() 5767 5768 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5769 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5770 5771 def _parse_user_defined_function( 5772 self, kind: t.Optional[TokenType] = None 5773 ) -> t.Optional[exp.Expression]: 5774 this = self._parse_table_parts(schema=True) 5775 5776 if not self._match(TokenType.L_PAREN): 5777 return this 5778 5779 expressions = self._parse_csv(self._parse_function_parameter) 5780 self._match_r_paren() 5781 return self.expression( 5782 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5783 ) 5784 5785 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5786 literal = self._parse_primary() 5787 if literal: 5788 return self.expression(exp.Introducer, this=token.text, expression=literal) 5789 5790 return self._identifier_expression(token) 5791 5792 def _parse_session_parameter(self) -> exp.SessionParameter: 5793 kind = None 5794 this = self._parse_id_var() or self._parse_primary() 5795 5796 if this and self._match(TokenType.DOT): 5797 kind = this.name 5798 this = self._parse_var() or self._parse_primary() 5799 5800 return self.expression(exp.SessionParameter, this=this, kind=kind) 5801 5802 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5803 return self._parse_id_var() 5804 5805 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5806 index = self._index 5807 5808 if self._match(TokenType.L_PAREN): 5809 expressions = t.cast( 5810 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5811 ) 5812 5813 if not self._match(TokenType.R_PAREN): 5814 self._retreat(index) 5815 else: 5816 expressions = [self._parse_lambda_arg()] 5817 5818 if self._match_set(self.LAMBDAS): 5819 return self.LAMBDAS[self._prev.token_type](self, expressions) 5820 5821 self._retreat(index) 5822 5823 this: t.Optional[exp.Expression] 5824 5825 if self._match(TokenType.DISTINCT): 5826 this = self.expression( 5827 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5828 ) 5829 else: 5830 this = self._parse_select_or_expression(alias=alias) 5831 5832 return self._parse_limit( 5833 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5834 ) 5835 5836 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5837 index = self._index 5838 if not self._match(TokenType.L_PAREN): 5839 return this 5840 5841 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5842 # expr can be of both types 5843 if self._match_set(self.SELECT_START_TOKENS): 5844 self._retreat(index) 5845 return this 5846 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5847 self._match_r_paren() 5848 return self.expression(exp.Schema, this=this, expressions=args) 5849 5850 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5851 return self._parse_column_def(self._parse_field(any_token=True)) 5852 5853 def _parse_column_def( 5854 self, this: t.Optional[exp.Expression], computed_column: bool = True 5855 ) -> t.Optional[exp.Expression]: 5856 # column defs are not really columns, they're identifiers 5857 if isinstance(this, exp.Column): 5858 this = this.this 5859 5860 if not computed_column: 5861 self._match(TokenType.ALIAS) 5862 5863 kind = self._parse_types(schema=True) 5864 5865 if self._match_text_seq("FOR", "ORDINALITY"): 5866 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5867 5868 constraints: t.List[exp.Expression] = [] 5869 5870 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5871 ("ALIAS", "MATERIALIZED") 5872 ): 5873 persisted = self._prev.text.upper() == "MATERIALIZED" 5874 constraint_kind = exp.ComputedColumnConstraint( 5875 this=self._parse_assignment(), 5876 persisted=persisted or self._match_text_seq("PERSISTED"), 5877 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5878 ) 5879 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5880 elif ( 5881 kind 5882 and self._match(TokenType.ALIAS, advance=False) 5883 and ( 5884 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5885 or (self._next and self._next.token_type == TokenType.L_PAREN) 5886 ) 5887 ): 5888 self._advance() 5889 constraints.append( 5890 self.expression( 5891 exp.ColumnConstraint, 5892 kind=exp.ComputedColumnConstraint( 5893 this=self._parse_disjunction(), 5894 persisted=self._match_texts(("STORED", "VIRTUAL")) 5895 and self._prev.text.upper() == "STORED", 5896 ), 5897 ) 5898 ) 5899 5900 while True: 5901 constraint = self._parse_column_constraint() 5902 if not constraint: 5903 break 5904 constraints.append(constraint) 5905 5906 if not kind and not constraints: 5907 return this 5908 5909 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5910 5911 def _parse_auto_increment( 5912 self, 5913 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5914 start = None 5915 increment = None 5916 5917 if self._match(TokenType.L_PAREN, advance=False): 5918 args = self._parse_wrapped_csv(self._parse_bitwise) 5919 start = seq_get(args, 0) 5920 increment = seq_get(args, 1) 5921 elif self._match_text_seq("START"): 5922 start = self._parse_bitwise() 5923 self._match_text_seq("INCREMENT") 5924 increment = self._parse_bitwise() 5925 5926 if start and increment: 5927 return exp.GeneratedAsIdentityColumnConstraint( 5928 start=start, increment=increment, this=False 5929 ) 5930 5931 return exp.AutoIncrementColumnConstraint() 5932 5933 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5934 if not self._match_text_seq("REFRESH"): 5935 self._retreat(self._index - 1) 5936 return None 5937 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5938 5939 def _parse_compress(self) -> exp.CompressColumnConstraint: 5940 if self._match(TokenType.L_PAREN, advance=False): 5941 return self.expression( 5942 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5943 ) 5944 5945 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5946 5947 def _parse_generated_as_identity( 5948 self, 5949 ) -> ( 5950 exp.GeneratedAsIdentityColumnConstraint 5951 | exp.ComputedColumnConstraint 5952 | exp.GeneratedAsRowColumnConstraint 5953 ): 5954 if self._match_text_seq("BY", "DEFAULT"): 5955 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5956 this = self.expression( 5957 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5958 ) 5959 else: 5960 self._match_text_seq("ALWAYS") 5961 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5962 5963 self._match(TokenType.ALIAS) 5964 5965 if self._match_text_seq("ROW"): 5966 start = self._match_text_seq("START") 5967 if not start: 5968 self._match(TokenType.END) 5969 hidden = self._match_text_seq("HIDDEN") 5970 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5971 5972 identity = self._match_text_seq("IDENTITY") 5973 5974 if self._match(TokenType.L_PAREN): 5975 if self._match(TokenType.START_WITH): 5976 this.set("start", self._parse_bitwise()) 5977 if self._match_text_seq("INCREMENT", "BY"): 5978 this.set("increment", self._parse_bitwise()) 5979 if self._match_text_seq("MINVALUE"): 5980 this.set("minvalue", self._parse_bitwise()) 5981 if self._match_text_seq("MAXVALUE"): 5982 this.set("maxvalue", self._parse_bitwise()) 5983 5984 if self._match_text_seq("CYCLE"): 5985 this.set("cycle", True) 5986 elif self._match_text_seq("NO", "CYCLE"): 5987 this.set("cycle", False) 5988 5989 if not identity: 5990 this.set("expression", self._parse_range()) 5991 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5992 args = self._parse_csv(self._parse_bitwise) 5993 this.set("start", seq_get(args, 0)) 5994 this.set("increment", seq_get(args, 1)) 5995 5996 self._match_r_paren() 5997 5998 return this 5999 6000 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6001 self._match_text_seq("LENGTH") 6002 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6003 6004 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6005 if self._match_text_seq("NULL"): 6006 return self.expression(exp.NotNullColumnConstraint) 6007 if self._match_text_seq("CASESPECIFIC"): 6008 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6009 if self._match_text_seq("FOR", "REPLICATION"): 6010 return self.expression(exp.NotForReplicationColumnConstraint) 6011 6012 # Unconsume the `NOT` token 6013 self._retreat(self._index - 1) 6014 return None 6015 6016 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6017 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6018 6019 procedure_option_follows = ( 6020 self._match(TokenType.WITH, advance=False) 6021 and self._next 6022 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6023 ) 6024 6025 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6026 return self.expression( 6027 exp.ColumnConstraint, 6028 this=this, 6029 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6030 ) 6031 6032 return this 6033 6034 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6035 if not self._match(TokenType.CONSTRAINT): 6036 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6037 6038 return self.expression( 6039 exp.Constraint, 6040 this=self._parse_id_var(), 6041 expressions=self._parse_unnamed_constraints(), 6042 ) 6043 6044 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6045 constraints = [] 6046 while True: 6047 constraint = self._parse_unnamed_constraint() or self._parse_function() 6048 if not constraint: 6049 break 6050 constraints.append(constraint) 6051 6052 return constraints 6053 6054 def _parse_unnamed_constraint( 6055 self, constraints: t.Optional[t.Collection[str]] = None 6056 ) -> t.Optional[exp.Expression]: 6057 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6058 constraints or self.CONSTRAINT_PARSERS 6059 ): 6060 return None 6061 6062 constraint = self._prev.text.upper() 6063 if constraint not in self.CONSTRAINT_PARSERS: 6064 self.raise_error(f"No parser found for schema constraint {constraint}.") 6065 6066 return self.CONSTRAINT_PARSERS[constraint](self) 6067 6068 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6069 return self._parse_id_var(any_token=False) 6070 6071 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6072 self._match_text_seq("KEY") 6073 return self.expression( 6074 exp.UniqueColumnConstraint, 6075 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6076 this=self._parse_schema(self._parse_unique_key()), 6077 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6078 on_conflict=self._parse_on_conflict(), 6079 options=self._parse_key_constraint_options(), 6080 ) 6081 6082 def _parse_key_constraint_options(self) -> t.List[str]: 6083 options = [] 6084 while True: 6085 if not self._curr: 6086 break 6087 6088 if self._match(TokenType.ON): 6089 action = None 6090 on = self._advance_any() and self._prev.text 6091 6092 if self._match_text_seq("NO", "ACTION"): 6093 action = "NO ACTION" 6094 elif self._match_text_seq("CASCADE"): 6095 action = "CASCADE" 6096 elif self._match_text_seq("RESTRICT"): 6097 action = "RESTRICT" 6098 elif self._match_pair(TokenType.SET, TokenType.NULL): 6099 action = "SET NULL" 6100 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6101 action = "SET DEFAULT" 6102 else: 6103 self.raise_error("Invalid key constraint") 6104 6105 options.append(f"ON {on} {action}") 6106 else: 6107 var = self._parse_var_from_options( 6108 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6109 ) 6110 if not var: 6111 break 6112 options.append(var.name) 6113 6114 return options 6115 6116 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6117 if match and not self._match(TokenType.REFERENCES): 6118 return None 6119 6120 expressions = None 6121 this = self._parse_table(schema=True) 6122 options = self._parse_key_constraint_options() 6123 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6124 6125 def _parse_foreign_key(self) -> exp.ForeignKey: 6126 expressions = ( 6127 self._parse_wrapped_id_vars() 6128 if not self._match(TokenType.REFERENCES, advance=False) 6129 else None 6130 ) 6131 reference = self._parse_references() 6132 on_options = {} 6133 6134 while self._match(TokenType.ON): 6135 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6136 self.raise_error("Expected DELETE or UPDATE") 6137 6138 kind = self._prev.text.lower() 6139 6140 if self._match_text_seq("NO", "ACTION"): 6141 action = "NO ACTION" 6142 elif self._match(TokenType.SET): 6143 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6144 action = "SET " + self._prev.text.upper() 6145 else: 6146 self._advance() 6147 action = self._prev.text.upper() 6148 6149 on_options[kind] = action 6150 6151 return self.expression( 6152 exp.ForeignKey, 6153 expressions=expressions, 6154 reference=reference, 6155 options=self._parse_key_constraint_options(), 6156 **on_options, # type: ignore 6157 ) 6158 6159 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6160 return self._parse_ordered() or self._parse_field() 6161 6162 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6163 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6164 self._retreat(self._index - 1) 6165 return None 6166 6167 id_vars = self._parse_wrapped_id_vars() 6168 return self.expression( 6169 exp.PeriodForSystemTimeConstraint, 6170 this=seq_get(id_vars, 0), 6171 expression=seq_get(id_vars, 1), 6172 ) 6173 6174 def _parse_primary_key( 6175 self, wrapped_optional: bool = False, in_props: bool = False 6176 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6177 desc = ( 6178 self._match_set((TokenType.ASC, TokenType.DESC)) 6179 and self._prev.token_type == TokenType.DESC 6180 ) 6181 6182 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6183 return self.expression( 6184 exp.PrimaryKeyColumnConstraint, 6185 desc=desc, 6186 options=self._parse_key_constraint_options(), 6187 ) 6188 6189 expressions = self._parse_wrapped_csv( 6190 self._parse_primary_key_part, optional=wrapped_optional 6191 ) 6192 options = self._parse_key_constraint_options() 6193 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6194 6195 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6196 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6197 6198 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6199 """ 6200 Parses a datetime column in ODBC format. We parse the column into the corresponding 6201 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6202 same as we did for `DATE('yyyy-mm-dd')`. 6203 6204 Reference: 6205 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6206 """ 6207 self._match(TokenType.VAR) 6208 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6209 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6210 if not self._match(TokenType.R_BRACE): 6211 self.raise_error("Expected }") 6212 return expression 6213 6214 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6215 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6216 return this 6217 6218 bracket_kind = self._prev.token_type 6219 if ( 6220 bracket_kind == TokenType.L_BRACE 6221 and self._curr 6222 and self._curr.token_type == TokenType.VAR 6223 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6224 ): 6225 return self._parse_odbc_datetime_literal() 6226 6227 expressions = self._parse_csv( 6228 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6229 ) 6230 6231 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6232 self.raise_error("Expected ]") 6233 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6234 self.raise_error("Expected }") 6235 6236 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6237 if bracket_kind == TokenType.L_BRACE: 6238 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6239 elif not this: 6240 this = build_array_constructor( 6241 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6242 ) 6243 else: 6244 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6245 if constructor_type: 6246 return build_array_constructor( 6247 constructor_type, 6248 args=expressions, 6249 bracket_kind=bracket_kind, 6250 dialect=self.dialect, 6251 ) 6252 6253 expressions = apply_index_offset( 6254 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6255 ) 6256 this = self.expression( 6257 exp.Bracket, 6258 this=this, 6259 expressions=expressions, 6260 comments=this.pop_comments(), 6261 ) 6262 6263 self._add_comments(this) 6264 return self._parse_bracket(this) 6265 6266 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6267 if self._match(TokenType.COLON): 6268 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6269 return this 6270 6271 def _parse_case(self) -> t.Optional[exp.Expression]: 6272 ifs = [] 6273 default = None 6274 6275 comments = self._prev_comments 6276 expression = self._parse_assignment() 6277 6278 while self._match(TokenType.WHEN): 6279 this = self._parse_assignment() 6280 self._match(TokenType.THEN) 6281 then = self._parse_assignment() 6282 ifs.append(self.expression(exp.If, this=this, true=then)) 6283 6284 if self._match(TokenType.ELSE): 6285 default = self._parse_assignment() 6286 6287 if not self._match(TokenType.END): 6288 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6289 default = exp.column("interval") 6290 else: 6291 self.raise_error("Expected END after CASE", self._prev) 6292 6293 return self.expression( 6294 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6295 ) 6296 6297 def _parse_if(self) -> t.Optional[exp.Expression]: 6298 if self._match(TokenType.L_PAREN): 6299 args = self._parse_csv( 6300 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6301 ) 6302 this = self.validate_expression(exp.If.from_arg_list(args), args) 6303 self._match_r_paren() 6304 else: 6305 index = self._index - 1 6306 6307 if self.NO_PAREN_IF_COMMANDS and index == 0: 6308 return self._parse_as_command(self._prev) 6309 6310 condition = self._parse_assignment() 6311 6312 if not condition: 6313 self._retreat(index) 6314 return None 6315 6316 self._match(TokenType.THEN) 6317 true = self._parse_assignment() 6318 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6319 self._match(TokenType.END) 6320 this = self.expression(exp.If, this=condition, true=true, false=false) 6321 6322 return this 6323 6324 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6325 if not self._match_text_seq("VALUE", "FOR"): 6326 self._retreat(self._index - 1) 6327 return None 6328 6329 return self.expression( 6330 exp.NextValueFor, 6331 this=self._parse_column(), 6332 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6333 ) 6334 6335 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6336 this = self._parse_function() or self._parse_var_or_string(upper=True) 6337 6338 if self._match(TokenType.FROM): 6339 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6340 6341 if not self._match(TokenType.COMMA): 6342 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6343 6344 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6345 6346 def _parse_gap_fill(self) -> exp.GapFill: 6347 self._match(TokenType.TABLE) 6348 this = self._parse_table() 6349 6350 self._match(TokenType.COMMA) 6351 args = [this, *self._parse_csv(self._parse_lambda)] 6352 6353 gap_fill = exp.GapFill.from_arg_list(args) 6354 return self.validate_expression(gap_fill, args) 6355 6356 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6357 this = self._parse_assignment() 6358 6359 if not self._match(TokenType.ALIAS): 6360 if self._match(TokenType.COMMA): 6361 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6362 6363 self.raise_error("Expected AS after CAST") 6364 6365 fmt = None 6366 to = self._parse_types() 6367 6368 default = self._match(TokenType.DEFAULT) 6369 if default: 6370 default = self._parse_bitwise() 6371 self._match_text_seq("ON", "CONVERSION", "ERROR") 6372 6373 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6374 fmt_string = self._parse_string() 6375 fmt = self._parse_at_time_zone(fmt_string) 6376 6377 if not to: 6378 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6379 if to.this in exp.DataType.TEMPORAL_TYPES: 6380 this = self.expression( 6381 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6382 this=this, 6383 format=exp.Literal.string( 6384 format_time( 6385 fmt_string.this if fmt_string else "", 6386 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6387 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6388 ) 6389 ), 6390 safe=safe, 6391 ) 6392 6393 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6394 this.set("zone", fmt.args["zone"]) 6395 return this 6396 elif not to: 6397 self.raise_error("Expected TYPE after CAST") 6398 elif isinstance(to, exp.Identifier): 6399 to = exp.DataType.build(to.name, udt=True) 6400 elif to.this == exp.DataType.Type.CHAR: 6401 if self._match(TokenType.CHARACTER_SET): 6402 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6403 6404 return self.expression( 6405 exp.Cast if strict else exp.TryCast, 6406 this=this, 6407 to=to, 6408 format=fmt, 6409 safe=safe, 6410 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6411 default=default, 6412 ) 6413 6414 def _parse_string_agg(self) -> exp.GroupConcat: 6415 if self._match(TokenType.DISTINCT): 6416 args: t.List[t.Optional[exp.Expression]] = [ 6417 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6418 ] 6419 if self._match(TokenType.COMMA): 6420 args.extend(self._parse_csv(self._parse_assignment)) 6421 else: 6422 args = self._parse_csv(self._parse_assignment) # type: ignore 6423 6424 if self._match_text_seq("ON", "OVERFLOW"): 6425 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6426 if self._match_text_seq("ERROR"): 6427 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6428 else: 6429 self._match_text_seq("TRUNCATE") 6430 on_overflow = self.expression( 6431 exp.OverflowTruncateBehavior, 6432 this=self._parse_string(), 6433 with_count=( 6434 self._match_text_seq("WITH", "COUNT") 6435 or not self._match_text_seq("WITHOUT", "COUNT") 6436 ), 6437 ) 6438 else: 6439 on_overflow = None 6440 6441 index = self._index 6442 if not self._match(TokenType.R_PAREN) and args: 6443 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6444 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6445 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6446 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6447 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6448 6449 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6450 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6451 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6452 if not self._match_text_seq("WITHIN", "GROUP"): 6453 self._retreat(index) 6454 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6455 6456 # The corresponding match_r_paren will be called in parse_function (caller) 6457 self._match_l_paren() 6458 6459 return self.expression( 6460 exp.GroupConcat, 6461 this=self._parse_order(this=seq_get(args, 0)), 6462 separator=seq_get(args, 1), 6463 on_overflow=on_overflow, 6464 ) 6465 6466 def _parse_convert( 6467 self, strict: bool, safe: t.Optional[bool] = None 6468 ) -> t.Optional[exp.Expression]: 6469 this = self._parse_bitwise() 6470 6471 if self._match(TokenType.USING): 6472 to: t.Optional[exp.Expression] = self.expression( 6473 exp.CharacterSet, this=self._parse_var() 6474 ) 6475 elif self._match(TokenType.COMMA): 6476 to = self._parse_types() 6477 else: 6478 to = None 6479 6480 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6481 6482 def _parse_xml_table(self) -> exp.XMLTable: 6483 namespaces = None 6484 passing = None 6485 columns = None 6486 6487 if self._match_text_seq("XMLNAMESPACES", "("): 6488 namespaces = self._parse_xml_namespace() 6489 self._match_text_seq(")", ",") 6490 6491 this = self._parse_string() 6492 6493 if self._match_text_seq("PASSING"): 6494 # The BY VALUE keywords are optional and are provided for semantic clarity 6495 self._match_text_seq("BY", "VALUE") 6496 passing = self._parse_csv(self._parse_column) 6497 6498 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6499 6500 if self._match_text_seq("COLUMNS"): 6501 columns = self._parse_csv(self._parse_field_def) 6502 6503 return self.expression( 6504 exp.XMLTable, 6505 this=this, 6506 namespaces=namespaces, 6507 passing=passing, 6508 columns=columns, 6509 by_ref=by_ref, 6510 ) 6511 6512 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6513 namespaces = [] 6514 6515 while True: 6516 if self._match(TokenType.DEFAULT): 6517 uri = self._parse_string() 6518 else: 6519 uri = self._parse_alias(self._parse_string()) 6520 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6521 if not self._match(TokenType.COMMA): 6522 break 6523 6524 return namespaces 6525 6526 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6527 """ 6528 There are generally two variants of the DECODE function: 6529 6530 - DECODE(bin, charset) 6531 - DECODE(expression, search, result [, search, result] ... [, default]) 6532 6533 The second variant will always be parsed into a CASE expression. Note that NULL 6534 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6535 instead of relying on pattern matching. 6536 """ 6537 args = self._parse_csv(self._parse_assignment) 6538 6539 if len(args) < 3: 6540 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6541 6542 expression, *expressions = args 6543 if not expression: 6544 return None 6545 6546 ifs = [] 6547 for search, result in zip(expressions[::2], expressions[1::2]): 6548 if not search or not result: 6549 return None 6550 6551 if isinstance(search, exp.Literal): 6552 ifs.append( 6553 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6554 ) 6555 elif isinstance(search, exp.Null): 6556 ifs.append( 6557 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6558 ) 6559 else: 6560 cond = exp.or_( 6561 exp.EQ(this=expression.copy(), expression=search), 6562 exp.and_( 6563 exp.Is(this=expression.copy(), expression=exp.Null()), 6564 exp.Is(this=search.copy(), expression=exp.Null()), 6565 copy=False, 6566 ), 6567 copy=False, 6568 ) 6569 ifs.append(exp.If(this=cond, true=result)) 6570 6571 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6572 6573 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6574 self._match_text_seq("KEY") 6575 key = self._parse_column() 6576 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6577 self._match_text_seq("VALUE") 6578 value = self._parse_bitwise() 6579 6580 if not key and not value: 6581 return None 6582 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6583 6584 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6585 if not this or not self._match_text_seq("FORMAT", "JSON"): 6586 return this 6587 6588 return self.expression(exp.FormatJson, this=this) 6589 6590 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6591 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6592 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6593 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6594 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6595 else: 6596 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6597 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6598 6599 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6600 6601 if not empty and not error and not null: 6602 return None 6603 6604 return self.expression( 6605 exp.OnCondition, 6606 empty=empty, 6607 error=error, 6608 null=null, 6609 ) 6610 6611 def _parse_on_handling( 6612 self, on: str, *values: str 6613 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6614 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6615 for value in values: 6616 if self._match_text_seq(value, "ON", on): 6617 return f"{value} ON {on}" 6618 6619 index = self._index 6620 if self._match(TokenType.DEFAULT): 6621 default_value = self._parse_bitwise() 6622 if self._match_text_seq("ON", on): 6623 return default_value 6624 6625 self._retreat(index) 6626 6627 return None 6628 6629 @t.overload 6630 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6631 6632 @t.overload 6633 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6634 6635 def _parse_json_object(self, agg=False): 6636 star = self._parse_star() 6637 expressions = ( 6638 [star] 6639 if star 6640 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6641 ) 6642 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6643 6644 unique_keys = None 6645 if self._match_text_seq("WITH", "UNIQUE"): 6646 unique_keys = True 6647 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6648 unique_keys = False 6649 6650 self._match_text_seq("KEYS") 6651 6652 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6653 self._parse_type() 6654 ) 6655 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6656 6657 return self.expression( 6658 exp.JSONObjectAgg if agg else exp.JSONObject, 6659 expressions=expressions, 6660 null_handling=null_handling, 6661 unique_keys=unique_keys, 6662 return_type=return_type, 6663 encoding=encoding, 6664 ) 6665 6666 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6667 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6668 if not self._match_text_seq("NESTED"): 6669 this = self._parse_id_var() 6670 kind = self._parse_types(allow_identifiers=False) 6671 nested = None 6672 else: 6673 this = None 6674 kind = None 6675 nested = True 6676 6677 path = self._match_text_seq("PATH") and self._parse_string() 6678 nested_schema = nested and self._parse_json_schema() 6679 6680 return self.expression( 6681 exp.JSONColumnDef, 6682 this=this, 6683 kind=kind, 6684 path=path, 6685 nested_schema=nested_schema, 6686 ) 6687 6688 def _parse_json_schema(self) -> exp.JSONSchema: 6689 self._match_text_seq("COLUMNS") 6690 return self.expression( 6691 exp.JSONSchema, 6692 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6693 ) 6694 6695 def _parse_json_table(self) -> exp.JSONTable: 6696 this = self._parse_format_json(self._parse_bitwise()) 6697 path = self._match(TokenType.COMMA) and self._parse_string() 6698 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6699 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6700 schema = self._parse_json_schema() 6701 6702 return exp.JSONTable( 6703 this=this, 6704 schema=schema, 6705 path=path, 6706 error_handling=error_handling, 6707 empty_handling=empty_handling, 6708 ) 6709 6710 def _parse_match_against(self) -> exp.MatchAgainst: 6711 expressions = self._parse_csv(self._parse_column) 6712 6713 self._match_text_seq(")", "AGAINST", "(") 6714 6715 this = self._parse_string() 6716 6717 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6718 modifier = "IN NATURAL LANGUAGE MODE" 6719 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6720 modifier = f"{modifier} WITH QUERY EXPANSION" 6721 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6722 modifier = "IN BOOLEAN MODE" 6723 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6724 modifier = "WITH QUERY EXPANSION" 6725 else: 6726 modifier = None 6727 6728 return self.expression( 6729 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6730 ) 6731 6732 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6733 def _parse_open_json(self) -> exp.OpenJSON: 6734 this = self._parse_bitwise() 6735 path = self._match(TokenType.COMMA) and self._parse_string() 6736 6737 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6738 this = self._parse_field(any_token=True) 6739 kind = self._parse_types() 6740 path = self._parse_string() 6741 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6742 6743 return self.expression( 6744 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6745 ) 6746 6747 expressions = None 6748 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6749 self._match_l_paren() 6750 expressions = self._parse_csv(_parse_open_json_column_def) 6751 6752 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6753 6754 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6755 args = self._parse_csv(self._parse_bitwise) 6756 6757 if self._match(TokenType.IN): 6758 return self.expression( 6759 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6760 ) 6761 6762 if haystack_first: 6763 haystack = seq_get(args, 0) 6764 needle = seq_get(args, 1) 6765 else: 6766 haystack = seq_get(args, 1) 6767 needle = seq_get(args, 0) 6768 6769 return self.expression( 6770 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6771 ) 6772 6773 def _parse_predict(self) -> exp.Predict: 6774 self._match_text_seq("MODEL") 6775 this = self._parse_table() 6776 6777 self._match(TokenType.COMMA) 6778 self._match_text_seq("TABLE") 6779 6780 return self.expression( 6781 exp.Predict, 6782 this=this, 6783 expression=self._parse_table(), 6784 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6785 ) 6786 6787 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6788 args = self._parse_csv(self._parse_table) 6789 return exp.JoinHint(this=func_name.upper(), expressions=args) 6790 6791 def _parse_substring(self) -> exp.Substring: 6792 # Postgres supports the form: substring(string [from int] [for int]) 6793 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6794 6795 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6796 6797 if self._match(TokenType.FROM): 6798 args.append(self._parse_bitwise()) 6799 if self._match(TokenType.FOR): 6800 if len(args) == 1: 6801 args.append(exp.Literal.number(1)) 6802 args.append(self._parse_bitwise()) 6803 6804 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6805 6806 def _parse_trim(self) -> exp.Trim: 6807 # https://www.w3resource.com/sql/character-functions/trim.php 6808 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6809 6810 position = None 6811 collation = None 6812 expression = None 6813 6814 if self._match_texts(self.TRIM_TYPES): 6815 position = self._prev.text.upper() 6816 6817 this = self._parse_bitwise() 6818 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6819 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6820 expression = self._parse_bitwise() 6821 6822 if invert_order: 6823 this, expression = expression, this 6824 6825 if self._match(TokenType.COLLATE): 6826 collation = self._parse_bitwise() 6827 6828 return self.expression( 6829 exp.Trim, this=this, position=position, expression=expression, collation=collation 6830 ) 6831 6832 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6833 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6834 6835 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6836 return self._parse_window(self._parse_id_var(), alias=True) 6837 6838 def _parse_respect_or_ignore_nulls( 6839 self, this: t.Optional[exp.Expression] 6840 ) -> t.Optional[exp.Expression]: 6841 if self._match_text_seq("IGNORE", "NULLS"): 6842 return self.expression(exp.IgnoreNulls, this=this) 6843 if self._match_text_seq("RESPECT", "NULLS"): 6844 return self.expression(exp.RespectNulls, this=this) 6845 return this 6846 6847 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6848 if self._match(TokenType.HAVING): 6849 self._match_texts(("MAX", "MIN")) 6850 max = self._prev.text.upper() != "MIN" 6851 return self.expression( 6852 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6853 ) 6854 6855 return this 6856 6857 def _parse_window( 6858 self, this: t.Optional[exp.Expression], alias: bool = False 6859 ) -> t.Optional[exp.Expression]: 6860 func = this 6861 comments = func.comments if isinstance(func, exp.Expression) else None 6862 6863 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6864 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6865 if self._match_text_seq("WITHIN", "GROUP"): 6866 order = self._parse_wrapped(self._parse_order) 6867 this = self.expression(exp.WithinGroup, this=this, expression=order) 6868 6869 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6870 self._match(TokenType.WHERE) 6871 this = self.expression( 6872 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6873 ) 6874 self._match_r_paren() 6875 6876 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6877 # Some dialects choose to implement and some do not. 6878 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6879 6880 # There is some code above in _parse_lambda that handles 6881 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6882 6883 # The below changes handle 6884 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6885 6886 # Oracle allows both formats 6887 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6888 # and Snowflake chose to do the same for familiarity 6889 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6890 if isinstance(this, exp.AggFunc): 6891 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6892 6893 if ignore_respect and ignore_respect is not this: 6894 ignore_respect.replace(ignore_respect.this) 6895 this = self.expression(ignore_respect.__class__, this=this) 6896 6897 this = self._parse_respect_or_ignore_nulls(this) 6898 6899 # bigquery select from window x AS (partition by ...) 6900 if alias: 6901 over = None 6902 self._match(TokenType.ALIAS) 6903 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6904 return this 6905 else: 6906 over = self._prev.text.upper() 6907 6908 if comments and isinstance(func, exp.Expression): 6909 func.pop_comments() 6910 6911 if not self._match(TokenType.L_PAREN): 6912 return self.expression( 6913 exp.Window, 6914 comments=comments, 6915 this=this, 6916 alias=self._parse_id_var(False), 6917 over=over, 6918 ) 6919 6920 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6921 6922 first = self._match(TokenType.FIRST) 6923 if self._match_text_seq("LAST"): 6924 first = False 6925 6926 partition, order = self._parse_partition_and_order() 6927 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6928 6929 if kind: 6930 self._match(TokenType.BETWEEN) 6931 start = self._parse_window_spec() 6932 self._match(TokenType.AND) 6933 end = self._parse_window_spec() 6934 exclude = ( 6935 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6936 if self._match_text_seq("EXCLUDE") 6937 else None 6938 ) 6939 6940 spec = self.expression( 6941 exp.WindowSpec, 6942 kind=kind, 6943 start=start["value"], 6944 start_side=start["side"], 6945 end=end["value"], 6946 end_side=end["side"], 6947 exclude=exclude, 6948 ) 6949 else: 6950 spec = None 6951 6952 self._match_r_paren() 6953 6954 window = self.expression( 6955 exp.Window, 6956 comments=comments, 6957 this=this, 6958 partition_by=partition, 6959 order=order, 6960 spec=spec, 6961 alias=window_alias, 6962 over=over, 6963 first=first, 6964 ) 6965 6966 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6967 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6968 return self._parse_window(window, alias=alias) 6969 6970 return window 6971 6972 def _parse_partition_and_order( 6973 self, 6974 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6975 return self._parse_partition_by(), self._parse_order() 6976 6977 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6978 self._match(TokenType.BETWEEN) 6979 6980 return { 6981 "value": ( 6982 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6983 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6984 or self._parse_bitwise() 6985 ), 6986 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6987 } 6988 6989 def _parse_alias( 6990 self, this: t.Optional[exp.Expression], explicit: bool = False 6991 ) -> t.Optional[exp.Expression]: 6992 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6993 # so this section tries to parse the clause version and if it fails, it treats the token 6994 # as an identifier (alias) 6995 if self._can_parse_limit_or_offset(): 6996 return this 6997 6998 any_token = self._match(TokenType.ALIAS) 6999 comments = self._prev_comments or [] 7000 7001 if explicit and not any_token: 7002 return this 7003 7004 if self._match(TokenType.L_PAREN): 7005 aliases = self.expression( 7006 exp.Aliases, 7007 comments=comments, 7008 this=this, 7009 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7010 ) 7011 self._match_r_paren(aliases) 7012 return aliases 7013 7014 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7015 self.STRING_ALIASES and self._parse_string_as_identifier() 7016 ) 7017 7018 if alias: 7019 comments.extend(alias.pop_comments()) 7020 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7021 column = this.this 7022 7023 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7024 if not this.comments and column and column.comments: 7025 this.comments = column.pop_comments() 7026 7027 return this 7028 7029 def _parse_id_var( 7030 self, 7031 any_token: bool = True, 7032 tokens: t.Optional[t.Collection[TokenType]] = None, 7033 ) -> t.Optional[exp.Expression]: 7034 expression = self._parse_identifier() 7035 if not expression and ( 7036 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7037 ): 7038 quoted = self._prev.token_type == TokenType.STRING 7039 expression = self._identifier_expression(quoted=quoted) 7040 7041 return expression 7042 7043 def _parse_string(self) -> t.Optional[exp.Expression]: 7044 if self._match_set(self.STRING_PARSERS): 7045 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7046 return self._parse_placeholder() 7047 7048 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7049 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7050 if output: 7051 output.update_positions(self._prev) 7052 return output 7053 7054 def _parse_number(self) -> t.Optional[exp.Expression]: 7055 if self._match_set(self.NUMERIC_PARSERS): 7056 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7057 return self._parse_placeholder() 7058 7059 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7060 if self._match(TokenType.IDENTIFIER): 7061 return self._identifier_expression(quoted=True) 7062 return self._parse_placeholder() 7063 7064 def _parse_var( 7065 self, 7066 any_token: bool = False, 7067 tokens: t.Optional[t.Collection[TokenType]] = None, 7068 upper: bool = False, 7069 ) -> t.Optional[exp.Expression]: 7070 if ( 7071 (any_token and self._advance_any()) 7072 or self._match(TokenType.VAR) 7073 or (self._match_set(tokens) if tokens else False) 7074 ): 7075 return self.expression( 7076 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7077 ) 7078 return self._parse_placeholder() 7079 7080 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7081 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7082 self._advance() 7083 return self._prev 7084 return None 7085 7086 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7087 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7088 7089 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7090 return self._parse_primary() or self._parse_var(any_token=True) 7091 7092 def _parse_null(self) -> t.Optional[exp.Expression]: 7093 if self._match_set(self.NULL_TOKENS): 7094 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7095 return self._parse_placeholder() 7096 7097 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7098 if self._match(TokenType.TRUE): 7099 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7100 if self._match(TokenType.FALSE): 7101 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7102 return self._parse_placeholder() 7103 7104 def _parse_star(self) -> t.Optional[exp.Expression]: 7105 if self._match(TokenType.STAR): 7106 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7107 return self._parse_placeholder() 7108 7109 def _parse_parameter(self) -> exp.Parameter: 7110 this = self._parse_identifier() or self._parse_primary_or_var() 7111 return self.expression(exp.Parameter, this=this) 7112 7113 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7114 if self._match_set(self.PLACEHOLDER_PARSERS): 7115 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7116 if placeholder: 7117 return placeholder 7118 self._advance(-1) 7119 return None 7120 7121 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7122 if not self._match_texts(keywords): 7123 return None 7124 if self._match(TokenType.L_PAREN, advance=False): 7125 return self._parse_wrapped_csv(self._parse_expression) 7126 7127 expression = self._parse_expression() 7128 return [expression] if expression else None 7129 7130 def _parse_csv( 7131 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7132 ) -> t.List[exp.Expression]: 7133 parse_result = parse_method() 7134 items = [parse_result] if parse_result is not None else [] 7135 7136 while self._match(sep): 7137 self._add_comments(parse_result) 7138 parse_result = parse_method() 7139 if parse_result is not None: 7140 items.append(parse_result) 7141 7142 return items 7143 7144 def _parse_tokens( 7145 self, parse_method: t.Callable, expressions: t.Dict 7146 ) -> t.Optional[exp.Expression]: 7147 this = parse_method() 7148 7149 while self._match_set(expressions): 7150 this = self.expression( 7151 expressions[self._prev.token_type], 7152 this=this, 7153 comments=self._prev_comments, 7154 expression=parse_method(), 7155 ) 7156 7157 return this 7158 7159 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7160 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7161 7162 def _parse_wrapped_csv( 7163 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7164 ) -> t.List[exp.Expression]: 7165 return self._parse_wrapped( 7166 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7167 ) 7168 7169 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7170 wrapped = self._match(TokenType.L_PAREN) 7171 if not wrapped and not optional: 7172 self.raise_error("Expecting (") 7173 parse_result = parse_method() 7174 if wrapped: 7175 self._match_r_paren() 7176 return parse_result 7177 7178 def _parse_expressions(self) -> t.List[exp.Expression]: 7179 return self._parse_csv(self._parse_expression) 7180 7181 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7182 return self._parse_select() or self._parse_set_operations( 7183 self._parse_alias(self._parse_assignment(), explicit=True) 7184 if alias 7185 else self._parse_assignment() 7186 ) 7187 7188 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7189 return self._parse_query_modifiers( 7190 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7191 ) 7192 7193 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7194 this = None 7195 if self._match_texts(self.TRANSACTION_KIND): 7196 this = self._prev.text 7197 7198 self._match_texts(("TRANSACTION", "WORK")) 7199 7200 modes = [] 7201 while True: 7202 mode = [] 7203 while self._match(TokenType.VAR): 7204 mode.append(self._prev.text) 7205 7206 if mode: 7207 modes.append(" ".join(mode)) 7208 if not self._match(TokenType.COMMA): 7209 break 7210 7211 return self.expression(exp.Transaction, this=this, modes=modes) 7212 7213 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7214 chain = None 7215 savepoint = None 7216 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7217 7218 self._match_texts(("TRANSACTION", "WORK")) 7219 7220 if self._match_text_seq("TO"): 7221 self._match_text_seq("SAVEPOINT") 7222 savepoint = self._parse_id_var() 7223 7224 if self._match(TokenType.AND): 7225 chain = not self._match_text_seq("NO") 7226 self._match_text_seq("CHAIN") 7227 7228 if is_rollback: 7229 return self.expression(exp.Rollback, savepoint=savepoint) 7230 7231 return self.expression(exp.Commit, chain=chain) 7232 7233 def _parse_refresh(self) -> exp.Refresh: 7234 self._match(TokenType.TABLE) 7235 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7236 7237 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7238 if not self._prev.text.upper() == "ADD": 7239 return None 7240 7241 start = self._index 7242 self._match(TokenType.COLUMN) 7243 7244 exists_column = self._parse_exists(not_=True) 7245 expression = self._parse_field_def() 7246 7247 if not isinstance(expression, exp.ColumnDef): 7248 self._retreat(start) 7249 return None 7250 7251 expression.set("exists", exists_column) 7252 7253 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7254 if self._match_texts(("FIRST", "AFTER")): 7255 position = self._prev.text 7256 column_position = self.expression( 7257 exp.ColumnPosition, this=self._parse_column(), position=position 7258 ) 7259 expression.set("position", column_position) 7260 7261 return expression 7262 7263 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7264 drop = self._match(TokenType.DROP) and self._parse_drop() 7265 if drop and not isinstance(drop, exp.Command): 7266 drop.set("kind", drop.args.get("kind", "COLUMN")) 7267 return drop 7268 7269 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7270 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7271 return self.expression( 7272 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7273 ) 7274 7275 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7276 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7277 self._match_text_seq("ADD") 7278 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7279 return self.expression( 7280 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7281 ) 7282 7283 column_def = self._parse_add_column() 7284 if isinstance(column_def, exp.ColumnDef): 7285 return column_def 7286 7287 exists = self._parse_exists(not_=True) 7288 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7289 return self.expression( 7290 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7291 ) 7292 7293 return None 7294 7295 if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq( 7296 "COLUMNS" 7297 ): 7298 schema = self._parse_schema() 7299 7300 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7301 7302 return self._parse_csv(_parse_add_alteration) 7303 7304 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7305 if self._match_texts(self.ALTER_ALTER_PARSERS): 7306 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7307 7308 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7309 # keyword after ALTER we default to parsing this statement 7310 self._match(TokenType.COLUMN) 7311 column = self._parse_field(any_token=True) 7312 7313 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7314 return self.expression(exp.AlterColumn, this=column, drop=True) 7315 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7316 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7317 if self._match(TokenType.COMMENT): 7318 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7319 if self._match_text_seq("DROP", "NOT", "NULL"): 7320 return self.expression( 7321 exp.AlterColumn, 7322 this=column, 7323 drop=True, 7324 allow_null=True, 7325 ) 7326 if self._match_text_seq("SET", "NOT", "NULL"): 7327 return self.expression( 7328 exp.AlterColumn, 7329 this=column, 7330 allow_null=False, 7331 ) 7332 7333 if self._match_text_seq("SET", "VISIBLE"): 7334 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7335 if self._match_text_seq("SET", "INVISIBLE"): 7336 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7337 7338 self._match_text_seq("SET", "DATA") 7339 self._match_text_seq("TYPE") 7340 return self.expression( 7341 exp.AlterColumn, 7342 this=column, 7343 dtype=self._parse_types(), 7344 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7345 using=self._match(TokenType.USING) and self._parse_assignment(), 7346 ) 7347 7348 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7349 if self._match_texts(("ALL", "EVEN", "AUTO")): 7350 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7351 7352 self._match_text_seq("KEY", "DISTKEY") 7353 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7354 7355 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7356 if compound: 7357 self._match_text_seq("SORTKEY") 7358 7359 if self._match(TokenType.L_PAREN, advance=False): 7360 return self.expression( 7361 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7362 ) 7363 7364 self._match_texts(("AUTO", "NONE")) 7365 return self.expression( 7366 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7367 ) 7368 7369 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7370 index = self._index - 1 7371 7372 partition_exists = self._parse_exists() 7373 if self._match(TokenType.PARTITION, advance=False): 7374 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7375 7376 self._retreat(index) 7377 return self._parse_csv(self._parse_drop_column) 7378 7379 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7380 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7381 exists = self._parse_exists() 7382 old_column = self._parse_column() 7383 to = self._match_text_seq("TO") 7384 new_column = self._parse_column() 7385 7386 if old_column is None or to is None or new_column is None: 7387 return None 7388 7389 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7390 7391 self._match_text_seq("TO") 7392 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7393 7394 def _parse_alter_table_set(self) -> exp.AlterSet: 7395 alter_set = self.expression(exp.AlterSet) 7396 7397 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7398 "TABLE", "PROPERTIES" 7399 ): 7400 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7401 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7402 alter_set.set("expressions", [self._parse_assignment()]) 7403 elif self._match_texts(("LOGGED", "UNLOGGED")): 7404 alter_set.set("option", exp.var(self._prev.text.upper())) 7405 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7406 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7407 elif self._match_text_seq("LOCATION"): 7408 alter_set.set("location", self._parse_field()) 7409 elif self._match_text_seq("ACCESS", "METHOD"): 7410 alter_set.set("access_method", self._parse_field()) 7411 elif self._match_text_seq("TABLESPACE"): 7412 alter_set.set("tablespace", self._parse_field()) 7413 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7414 alter_set.set("file_format", [self._parse_field()]) 7415 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7416 alter_set.set("file_format", self._parse_wrapped_options()) 7417 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7418 alter_set.set("copy_options", self._parse_wrapped_options()) 7419 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7420 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7421 else: 7422 if self._match_text_seq("SERDE"): 7423 alter_set.set("serde", self._parse_field()) 7424 7425 properties = self._parse_wrapped(self._parse_properties, optional=True) 7426 alter_set.set("expressions", [properties]) 7427 7428 return alter_set 7429 7430 def _parse_alter(self) -> exp.Alter | exp.Command: 7431 start = self._prev 7432 7433 alter_token = self._match_set(self.ALTERABLES) and self._prev 7434 if not alter_token: 7435 return self._parse_as_command(start) 7436 7437 exists = self._parse_exists() 7438 only = self._match_text_seq("ONLY") 7439 this = self._parse_table(schema=True) 7440 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7441 7442 if self._next: 7443 self._advance() 7444 7445 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7446 if parser: 7447 actions = ensure_list(parser(self)) 7448 not_valid = self._match_text_seq("NOT", "VALID") 7449 options = self._parse_csv(self._parse_property) 7450 7451 if not self._curr and actions: 7452 return self.expression( 7453 exp.Alter, 7454 this=this, 7455 kind=alter_token.text.upper(), 7456 exists=exists, 7457 actions=actions, 7458 only=only, 7459 options=options, 7460 cluster=cluster, 7461 not_valid=not_valid, 7462 ) 7463 7464 return self._parse_as_command(start) 7465 7466 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7467 start = self._prev 7468 # https://duckdb.org/docs/sql/statements/analyze 7469 if not self._curr: 7470 return self.expression(exp.Analyze) 7471 7472 options = [] 7473 while self._match_texts(self.ANALYZE_STYLES): 7474 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7475 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7476 else: 7477 options.append(self._prev.text.upper()) 7478 7479 this: t.Optional[exp.Expression] = None 7480 inner_expression: t.Optional[exp.Expression] = None 7481 7482 kind = self._curr and self._curr.text.upper() 7483 7484 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7485 this = self._parse_table_parts() 7486 elif self._match_text_seq("TABLES"): 7487 if self._match_set((TokenType.FROM, TokenType.IN)): 7488 kind = f"{kind} {self._prev.text.upper()}" 7489 this = self._parse_table(schema=True, is_db_reference=True) 7490 elif self._match_text_seq("DATABASE"): 7491 this = self._parse_table(schema=True, is_db_reference=True) 7492 elif self._match_text_seq("CLUSTER"): 7493 this = self._parse_table() 7494 # Try matching inner expr keywords before fallback to parse table. 7495 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7496 kind = None 7497 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7498 else: 7499 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7500 kind = None 7501 this = self._parse_table_parts() 7502 7503 partition = self._try_parse(self._parse_partition) 7504 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7505 return self._parse_as_command(start) 7506 7507 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7508 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7509 "WITH", "ASYNC", "MODE" 7510 ): 7511 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7512 else: 7513 mode = None 7514 7515 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7516 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7517 7518 properties = self._parse_properties() 7519 return self.expression( 7520 exp.Analyze, 7521 kind=kind, 7522 this=this, 7523 mode=mode, 7524 partition=partition, 7525 properties=properties, 7526 expression=inner_expression, 7527 options=options, 7528 ) 7529 7530 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7531 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7532 this = None 7533 kind = self._prev.text.upper() 7534 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7535 expressions = [] 7536 7537 if not self._match_text_seq("STATISTICS"): 7538 self.raise_error("Expecting token STATISTICS") 7539 7540 if self._match_text_seq("NOSCAN"): 7541 this = "NOSCAN" 7542 elif self._match(TokenType.FOR): 7543 if self._match_text_seq("ALL", "COLUMNS"): 7544 this = "FOR ALL COLUMNS" 7545 if self._match_texts("COLUMNS"): 7546 this = "FOR COLUMNS" 7547 expressions = self._parse_csv(self._parse_column_reference) 7548 elif self._match_text_seq("SAMPLE"): 7549 sample = self._parse_number() 7550 expressions = [ 7551 self.expression( 7552 exp.AnalyzeSample, 7553 sample=sample, 7554 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7555 ) 7556 ] 7557 7558 return self.expression( 7559 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7560 ) 7561 7562 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7563 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7564 kind = None 7565 this = None 7566 expression: t.Optional[exp.Expression] = None 7567 if self._match_text_seq("REF", "UPDATE"): 7568 kind = "REF" 7569 this = "UPDATE" 7570 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7571 this = "UPDATE SET DANGLING TO NULL" 7572 elif self._match_text_seq("STRUCTURE"): 7573 kind = "STRUCTURE" 7574 if self._match_text_seq("CASCADE", "FAST"): 7575 this = "CASCADE FAST" 7576 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7577 ("ONLINE", "OFFLINE") 7578 ): 7579 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7580 expression = self._parse_into() 7581 7582 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7583 7584 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7585 this = self._prev.text.upper() 7586 if self._match_text_seq("COLUMNS"): 7587 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7588 return None 7589 7590 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7591 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7592 if self._match_text_seq("STATISTICS"): 7593 return self.expression(exp.AnalyzeDelete, kind=kind) 7594 return None 7595 7596 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7597 if self._match_text_seq("CHAINED", "ROWS"): 7598 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7599 return None 7600 7601 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7602 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7603 this = self._prev.text.upper() 7604 expression: t.Optional[exp.Expression] = None 7605 expressions = [] 7606 update_options = None 7607 7608 if self._match_text_seq("HISTOGRAM", "ON"): 7609 expressions = self._parse_csv(self._parse_column_reference) 7610 with_expressions = [] 7611 while self._match(TokenType.WITH): 7612 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7613 if self._match_texts(("SYNC", "ASYNC")): 7614 if self._match_text_seq("MODE", advance=False): 7615 with_expressions.append(f"{self._prev.text.upper()} MODE") 7616 self._advance() 7617 else: 7618 buckets = self._parse_number() 7619 if self._match_text_seq("BUCKETS"): 7620 with_expressions.append(f"{buckets} BUCKETS") 7621 if with_expressions: 7622 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7623 7624 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7625 TokenType.UPDATE, advance=False 7626 ): 7627 update_options = self._prev.text.upper() 7628 self._advance() 7629 elif self._match_text_seq("USING", "DATA"): 7630 expression = self.expression(exp.UsingData, this=self._parse_string()) 7631 7632 return self.expression( 7633 exp.AnalyzeHistogram, 7634 this=this, 7635 expressions=expressions, 7636 expression=expression, 7637 update_options=update_options, 7638 ) 7639 7640 def _parse_merge(self) -> exp.Merge: 7641 self._match(TokenType.INTO) 7642 target = self._parse_table() 7643 7644 if target and self._match(TokenType.ALIAS, advance=False): 7645 target.set("alias", self._parse_table_alias()) 7646 7647 self._match(TokenType.USING) 7648 using = self._parse_table() 7649 7650 self._match(TokenType.ON) 7651 on = self._parse_assignment() 7652 7653 return self.expression( 7654 exp.Merge, 7655 this=target, 7656 using=using, 7657 on=on, 7658 whens=self._parse_when_matched(), 7659 returning=self._parse_returning(), 7660 ) 7661 7662 def _parse_when_matched(self) -> exp.Whens: 7663 whens = [] 7664 7665 while self._match(TokenType.WHEN): 7666 matched = not self._match(TokenType.NOT) 7667 self._match_text_seq("MATCHED") 7668 source = ( 7669 False 7670 if self._match_text_seq("BY", "TARGET") 7671 else self._match_text_seq("BY", "SOURCE") 7672 ) 7673 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7674 7675 self._match(TokenType.THEN) 7676 7677 if self._match(TokenType.INSERT): 7678 this = self._parse_star() 7679 if this: 7680 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7681 else: 7682 then = self.expression( 7683 exp.Insert, 7684 this=exp.var("ROW") 7685 if self._match_text_seq("ROW") 7686 else self._parse_value(values=False), 7687 expression=self._match_text_seq("VALUES") and self._parse_value(), 7688 ) 7689 elif self._match(TokenType.UPDATE): 7690 expressions = self._parse_star() 7691 if expressions: 7692 then = self.expression(exp.Update, expressions=expressions) 7693 else: 7694 then = self.expression( 7695 exp.Update, 7696 expressions=self._match(TokenType.SET) 7697 and self._parse_csv(self._parse_equality), 7698 ) 7699 elif self._match(TokenType.DELETE): 7700 then = self.expression(exp.Var, this=self._prev.text) 7701 else: 7702 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7703 7704 whens.append( 7705 self.expression( 7706 exp.When, 7707 matched=matched, 7708 source=source, 7709 condition=condition, 7710 then=then, 7711 ) 7712 ) 7713 return self.expression(exp.Whens, expressions=whens) 7714 7715 def _parse_show(self) -> t.Optional[exp.Expression]: 7716 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7717 if parser: 7718 return parser(self) 7719 return self._parse_as_command(self._prev) 7720 7721 def _parse_set_item_assignment( 7722 self, kind: t.Optional[str] = None 7723 ) -> t.Optional[exp.Expression]: 7724 index = self._index 7725 7726 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7727 return self._parse_set_transaction(global_=kind == "GLOBAL") 7728 7729 left = self._parse_primary() or self._parse_column() 7730 assignment_delimiter = self._match_texts(("=", "TO")) 7731 7732 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7733 self._retreat(index) 7734 return None 7735 7736 right = self._parse_statement() or self._parse_id_var() 7737 if isinstance(right, (exp.Column, exp.Identifier)): 7738 right = exp.var(right.name) 7739 7740 this = self.expression(exp.EQ, this=left, expression=right) 7741 return self.expression(exp.SetItem, this=this, kind=kind) 7742 7743 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7744 self._match_text_seq("TRANSACTION") 7745 characteristics = self._parse_csv( 7746 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7747 ) 7748 return self.expression( 7749 exp.SetItem, 7750 expressions=characteristics, 7751 kind="TRANSACTION", 7752 **{"global": global_}, # type: ignore 7753 ) 7754 7755 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7756 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7757 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7758 7759 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7760 index = self._index 7761 set_ = self.expression( 7762 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7763 ) 7764 7765 if self._curr: 7766 self._retreat(index) 7767 return self._parse_as_command(self._prev) 7768 7769 return set_ 7770 7771 def _parse_var_from_options( 7772 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7773 ) -> t.Optional[exp.Var]: 7774 start = self._curr 7775 if not start: 7776 return None 7777 7778 option = start.text.upper() 7779 continuations = options.get(option) 7780 7781 index = self._index 7782 self._advance() 7783 for keywords in continuations or []: 7784 if isinstance(keywords, str): 7785 keywords = (keywords,) 7786 7787 if self._match_text_seq(*keywords): 7788 option = f"{option} {' '.join(keywords)}" 7789 break 7790 else: 7791 if continuations or continuations is None: 7792 if raise_unmatched: 7793 self.raise_error(f"Unknown option {option}") 7794 7795 self._retreat(index) 7796 return None 7797 7798 return exp.var(option) 7799 7800 def _parse_as_command(self, start: Token) -> exp.Command: 7801 while self._curr: 7802 self._advance() 7803 text = self._find_sql(start, self._prev) 7804 size = len(start.text) 7805 self._warn_unsupported() 7806 return exp.Command(this=text[:size], expression=text[size:]) 7807 7808 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7809 settings = [] 7810 7811 self._match_l_paren() 7812 kind = self._parse_id_var() 7813 7814 if self._match(TokenType.L_PAREN): 7815 while True: 7816 key = self._parse_id_var() 7817 value = self._parse_primary() 7818 if not key and value is None: 7819 break 7820 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7821 self._match(TokenType.R_PAREN) 7822 7823 self._match_r_paren() 7824 7825 return self.expression( 7826 exp.DictProperty, 7827 this=this, 7828 kind=kind.this if kind else None, 7829 settings=settings, 7830 ) 7831 7832 def _parse_dict_range(self, this: str) -> exp.DictRange: 7833 self._match_l_paren() 7834 has_min = self._match_text_seq("MIN") 7835 if has_min: 7836 min = self._parse_var() or self._parse_primary() 7837 self._match_text_seq("MAX") 7838 max = self._parse_var() or self._parse_primary() 7839 else: 7840 max = self._parse_var() or self._parse_primary() 7841 min = exp.Literal.number(0) 7842 self._match_r_paren() 7843 return self.expression(exp.DictRange, this=this, min=min, max=max) 7844 7845 def _parse_comprehension( 7846 self, this: t.Optional[exp.Expression] 7847 ) -> t.Optional[exp.Comprehension]: 7848 index = self._index 7849 expression = self._parse_column() 7850 if not self._match(TokenType.IN): 7851 self._retreat(index - 1) 7852 return None 7853 iterator = self._parse_column() 7854 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7855 return self.expression( 7856 exp.Comprehension, 7857 this=this, 7858 expression=expression, 7859 iterator=iterator, 7860 condition=condition, 7861 ) 7862 7863 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7864 if self._match(TokenType.HEREDOC_STRING): 7865 return self.expression(exp.Heredoc, this=self._prev.text) 7866 7867 if not self._match_text_seq("$"): 7868 return None 7869 7870 tags = ["$"] 7871 tag_text = None 7872 7873 if self._is_connected(): 7874 self._advance() 7875 tags.append(self._prev.text.upper()) 7876 else: 7877 self.raise_error("No closing $ found") 7878 7879 if tags[-1] != "$": 7880 if self._is_connected() and self._match_text_seq("$"): 7881 tag_text = tags[-1] 7882 tags.append("$") 7883 else: 7884 self.raise_error("No closing $ found") 7885 7886 heredoc_start = self._curr 7887 7888 while self._curr: 7889 if self._match_text_seq(*tags, advance=False): 7890 this = self._find_sql(heredoc_start, self._prev) 7891 self._advance(len(tags)) 7892 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7893 7894 self._advance() 7895 7896 self.raise_error(f"No closing {''.join(tags)} found") 7897 return None 7898 7899 def _find_parser( 7900 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7901 ) -> t.Optional[t.Callable]: 7902 if not self._curr: 7903 return None 7904 7905 index = self._index 7906 this = [] 7907 while True: 7908 # The current token might be multiple words 7909 curr = self._curr.text.upper() 7910 key = curr.split(" ") 7911 this.append(curr) 7912 7913 self._advance() 7914 result, trie = in_trie(trie, key) 7915 if result == TrieResult.FAILED: 7916 break 7917 7918 if result == TrieResult.EXISTS: 7919 subparser = parsers[" ".join(this)] 7920 return subparser 7921 7922 self._retreat(index) 7923 return None 7924 7925 def _match(self, token_type, advance=True, expression=None): 7926 if not self._curr: 7927 return None 7928 7929 if self._curr.token_type == token_type: 7930 if advance: 7931 self._advance() 7932 self._add_comments(expression) 7933 return True 7934 7935 return None 7936 7937 def _match_set(self, types, advance=True): 7938 if not self._curr: 7939 return None 7940 7941 if self._curr.token_type in types: 7942 if advance: 7943 self._advance() 7944 return True 7945 7946 return None 7947 7948 def _match_pair(self, token_type_a, token_type_b, advance=True): 7949 if not self._curr or not self._next: 7950 return None 7951 7952 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7953 if advance: 7954 self._advance(2) 7955 return True 7956 7957 return None 7958 7959 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7960 if not self._match(TokenType.L_PAREN, expression=expression): 7961 self.raise_error("Expecting (") 7962 7963 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7964 if not self._match(TokenType.R_PAREN, expression=expression): 7965 self.raise_error("Expecting )") 7966 7967 def _match_texts(self, texts, advance=True): 7968 if ( 7969 self._curr 7970 and self._curr.token_type != TokenType.STRING 7971 and self._curr.text.upper() in texts 7972 ): 7973 if advance: 7974 self._advance() 7975 return True 7976 return None 7977 7978 def _match_text_seq(self, *texts, advance=True): 7979 index = self._index 7980 for text in texts: 7981 if ( 7982 self._curr 7983 and self._curr.token_type != TokenType.STRING 7984 and self._curr.text.upper() == text 7985 ): 7986 self._advance() 7987 else: 7988 self._retreat(index) 7989 return None 7990 7991 if not advance: 7992 self._retreat(index) 7993 7994 return True 7995 7996 def _replace_lambda( 7997 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7998 ) -> t.Optional[exp.Expression]: 7999 if not node: 8000 return node 8001 8002 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8003 8004 for column in node.find_all(exp.Column): 8005 typ = lambda_types.get(column.parts[0].name) 8006 if typ is not None: 8007 dot_or_id = column.to_dot() if column.table else column.this 8008 8009 if typ: 8010 dot_or_id = self.expression( 8011 exp.Cast, 8012 this=dot_or_id, 8013 to=typ, 8014 ) 8015 8016 parent = column.parent 8017 8018 while isinstance(parent, exp.Dot): 8019 if not isinstance(parent.parent, exp.Dot): 8020 parent.replace(dot_or_id) 8021 break 8022 parent = parent.parent 8023 else: 8024 if column is node: 8025 node = dot_or_id 8026 else: 8027 column.replace(dot_or_id) 8028 return node 8029 8030 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8031 start = self._prev 8032 8033 # Not to be confused with TRUNCATE(number, decimals) function call 8034 if self._match(TokenType.L_PAREN): 8035 self._retreat(self._index - 2) 8036 return self._parse_function() 8037 8038 # Clickhouse supports TRUNCATE DATABASE as well 8039 is_database = self._match(TokenType.DATABASE) 8040 8041 self._match(TokenType.TABLE) 8042 8043 exists = self._parse_exists(not_=False) 8044 8045 expressions = self._parse_csv( 8046 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8047 ) 8048 8049 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8050 8051 if self._match_text_seq("RESTART", "IDENTITY"): 8052 identity = "RESTART" 8053 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8054 identity = "CONTINUE" 8055 else: 8056 identity = None 8057 8058 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8059 option = self._prev.text 8060 else: 8061 option = None 8062 8063 partition = self._parse_partition() 8064 8065 # Fallback case 8066 if self._curr: 8067 return self._parse_as_command(start) 8068 8069 return self.expression( 8070 exp.TruncateTable, 8071 expressions=expressions, 8072 is_database=is_database, 8073 exists=exists, 8074 cluster=cluster, 8075 identity=identity, 8076 option=option, 8077 partition=partition, 8078 ) 8079 8080 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8081 this = self._parse_ordered(self._parse_opclass) 8082 8083 if not self._match(TokenType.WITH): 8084 return this 8085 8086 op = self._parse_var(any_token=True) 8087 8088 return self.expression(exp.WithOperator, this=this, op=op) 8089 8090 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8091 self._match(TokenType.EQ) 8092 self._match(TokenType.L_PAREN) 8093 8094 opts: t.List[t.Optional[exp.Expression]] = [] 8095 option: exp.Expression | None 8096 while self._curr and not self._match(TokenType.R_PAREN): 8097 if self._match_text_seq("FORMAT_NAME", "="): 8098 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8099 option = self._parse_format_name() 8100 else: 8101 option = self._parse_property() 8102 8103 if option is None: 8104 self.raise_error("Unable to parse option") 8105 break 8106 8107 opts.append(option) 8108 8109 return opts 8110 8111 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8112 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8113 8114 options = [] 8115 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8116 option = self._parse_var(any_token=True) 8117 prev = self._prev.text.upper() 8118 8119 # Different dialects might separate options and values by white space, "=" and "AS" 8120 self._match(TokenType.EQ) 8121 self._match(TokenType.ALIAS) 8122 8123 param = self.expression(exp.CopyParameter, this=option) 8124 8125 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8126 TokenType.L_PAREN, advance=False 8127 ): 8128 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8129 param.set("expressions", self._parse_wrapped_options()) 8130 elif prev == "FILE_FORMAT": 8131 # T-SQL's external file format case 8132 param.set("expression", self._parse_field()) 8133 else: 8134 param.set("expression", self._parse_unquoted_field()) 8135 8136 options.append(param) 8137 self._match(sep) 8138 8139 return options 8140 8141 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8142 expr = self.expression(exp.Credentials) 8143 8144 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8145 expr.set("storage", self._parse_field()) 8146 if self._match_text_seq("CREDENTIALS"): 8147 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8148 creds = ( 8149 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8150 ) 8151 expr.set("credentials", creds) 8152 if self._match_text_seq("ENCRYPTION"): 8153 expr.set("encryption", self._parse_wrapped_options()) 8154 if self._match_text_seq("IAM_ROLE"): 8155 expr.set("iam_role", self._parse_field()) 8156 if self._match_text_seq("REGION"): 8157 expr.set("region", self._parse_field()) 8158 8159 return expr 8160 8161 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8162 return self._parse_field() 8163 8164 def _parse_copy(self) -> exp.Copy | exp.Command: 8165 start = self._prev 8166 8167 self._match(TokenType.INTO) 8168 8169 this = ( 8170 self._parse_select(nested=True, parse_subquery_alias=False) 8171 if self._match(TokenType.L_PAREN, advance=False) 8172 else self._parse_table(schema=True) 8173 ) 8174 8175 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8176 8177 files = self._parse_csv(self._parse_file_location) 8178 credentials = self._parse_credentials() 8179 8180 self._match_text_seq("WITH") 8181 8182 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8183 8184 # Fallback case 8185 if self._curr: 8186 return self._parse_as_command(start) 8187 8188 return self.expression( 8189 exp.Copy, 8190 this=this, 8191 kind=kind, 8192 credentials=credentials, 8193 files=files, 8194 params=params, 8195 ) 8196 8197 def _parse_normalize(self) -> exp.Normalize: 8198 return self.expression( 8199 exp.Normalize, 8200 this=self._parse_bitwise(), 8201 form=self._match(TokenType.COMMA) and self._parse_var(), 8202 ) 8203 8204 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8205 args = self._parse_csv(lambda: self._parse_lambda()) 8206 8207 this = seq_get(args, 0) 8208 decimals = seq_get(args, 1) 8209 8210 return expr_type( 8211 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8212 ) 8213 8214 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8215 star_token = self._prev 8216 8217 if self._match_text_seq("COLUMNS", "(", advance=False): 8218 this = self._parse_function() 8219 if isinstance(this, exp.Columns): 8220 this.set("unpack", True) 8221 return this 8222 8223 return self.expression( 8224 exp.Star, 8225 **{ # type: ignore 8226 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8227 "replace": self._parse_star_op("REPLACE"), 8228 "rename": self._parse_star_op("RENAME"), 8229 }, 8230 ).update_positions(star_token) 8231 8232 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8233 privilege_parts = [] 8234 8235 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8236 # (end of privilege list) or L_PAREN (start of column list) are met 8237 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8238 privilege_parts.append(self._curr.text.upper()) 8239 self._advance() 8240 8241 this = exp.var(" ".join(privilege_parts)) 8242 expressions = ( 8243 self._parse_wrapped_csv(self._parse_column) 8244 if self._match(TokenType.L_PAREN, advance=False) 8245 else None 8246 ) 8247 8248 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8249 8250 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8251 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8252 principal = self._parse_id_var() 8253 8254 if not principal: 8255 return None 8256 8257 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8258 8259 def _parse_grant(self) -> exp.Grant | exp.Command: 8260 start = self._prev 8261 8262 privileges = self._parse_csv(self._parse_grant_privilege) 8263 8264 self._match(TokenType.ON) 8265 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8266 8267 # Attempt to parse the securable e.g. MySQL allows names 8268 # such as "foo.*", "*.*" which are not easily parseable yet 8269 securable = self._try_parse(self._parse_table_parts) 8270 8271 if not securable or not self._match_text_seq("TO"): 8272 return self._parse_as_command(start) 8273 8274 principals = self._parse_csv(self._parse_grant_principal) 8275 8276 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8277 8278 if self._curr: 8279 return self._parse_as_command(start) 8280 8281 return self.expression( 8282 exp.Grant, 8283 privileges=privileges, 8284 kind=kind, 8285 securable=securable, 8286 principals=principals, 8287 grant_option=grant_option, 8288 ) 8289 8290 def _parse_overlay(self) -> exp.Overlay: 8291 return self.expression( 8292 exp.Overlay, 8293 **{ # type: ignore 8294 "this": self._parse_bitwise(), 8295 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8296 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8297 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8298 }, 8299 ) 8300 8301 def _parse_format_name(self) -> exp.Property: 8302 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8303 # for FILE_FORMAT = <format_name> 8304 return self.expression( 8305 exp.Property, 8306 this=exp.var("FORMAT_NAME"), 8307 value=self._parse_string() or self._parse_table_parts(), 8308 ) 8309 8310 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8311 args: t.List[exp.Expression] = [] 8312 8313 if self._match(TokenType.DISTINCT): 8314 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8315 self._match(TokenType.COMMA) 8316 8317 args.extend(self._parse_csv(self._parse_assignment)) 8318 8319 return self.expression( 8320 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8321 ) 8322 8323 def _identifier_expression( 8324 self, token: t.Optional[Token] = None, **kwargs: t.Any 8325 ) -> exp.Identifier: 8326 token = token or self._prev 8327 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8328 expression.update_positions(token) 8329 return expression 8330 8331 def _build_pipe_cte(self, query: exp.Query, expressions: t.List[exp.Expression]) -> exp.Select: 8332 if not query.selects: 8333 query = query.select("*", copy=False) 8334 8335 self._pipe_cte_counter += 1 8336 new_cte = f"__tmp{self._pipe_cte_counter}" 8337 8338 with_ = query.args.get("with") 8339 ctes = with_.pop() if with_ else None 8340 8341 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8342 if ctes: 8343 new_select.set("with", ctes) 8344 8345 return new_select.with_(new_cte, as_=query, copy=False) 8346 8347 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8348 select = self._parse_select() 8349 if not select: 8350 return query 8351 8352 if not query.selects: 8353 return self._build_pipe_cte(query.select(*select.expressions), [exp.Star()]) 8354 8355 return self._build_pipe_cte(query, select.expressions) 8356 8357 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8358 limit = self._parse_limit() 8359 offset = self._parse_offset() 8360 if limit: 8361 curr_limit = query.args.get("limit", limit) 8362 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8363 query.limit(limit, copy=False) 8364 if offset: 8365 curr_offset = query.args.get("offset") 8366 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8367 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8368 8369 return query 8370 8371 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8372 this = self._parse_assignment() 8373 if self._match_text_seq("GROUP", "AND", advance=False): 8374 return this 8375 8376 this = self._parse_alias(this) 8377 8378 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8379 return self._parse_ordered(lambda: this) 8380 8381 return this 8382 8383 def _parse_pipe_syntax_aggregate_group_order_by( 8384 self, query: exp.Select, group_by_exists: bool = True 8385 ) -> exp.Select: 8386 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8387 aggregates_or_groups, orders = [], [] 8388 for element in expr: 8389 if isinstance(element, exp.Ordered): 8390 this = element.this 8391 if isinstance(this, exp.Alias): 8392 element.set("this", this.args["alias"]) 8393 orders.append(element) 8394 else: 8395 this = element 8396 aggregates_or_groups.append(this) 8397 8398 if group_by_exists: 8399 query = query.select(*aggregates_or_groups, copy=False).group_by( 8400 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8401 copy=False, 8402 ) 8403 else: 8404 query = query.select(*aggregates_or_groups, copy=False) 8405 8406 if orders: 8407 return query.order_by(*orders, append=False, copy=False) 8408 8409 return query 8410 8411 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8412 self._match_text_seq("AGGREGATE") 8413 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8414 8415 if self._match(TokenType.GROUP_BY) or ( 8416 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8417 ): 8418 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8419 8420 return self._build_pipe_cte(query, [exp.Star()]) 8421 8422 def _parse_pipe_syntax_set_operator( 8423 self, query: t.Optional[exp.Query] 8424 ) -> t.Optional[exp.Select]: 8425 first_setop = self.parse_set_operation(this=query) 8426 8427 if not first_setop or not query: 8428 return None 8429 8430 first_setop.this.pop() 8431 distinct = first_setop.args.pop("distinct") 8432 setops = [first_setop.expression.pop(), *self._parse_expressions()] 8433 8434 query = self._build_pipe_cte(query, [exp.Star()]) 8435 with_ = query.args.get("with") 8436 ctes = with_.pop() if with_ else None 8437 8438 if isinstance(first_setop, exp.Union): 8439 query = query.union(*setops, distinct=distinct, copy=False, **first_setop.args) 8440 elif isinstance(first_setop, exp.Except): 8441 query = query.except_(*setops, distinct=distinct, copy=False, **first_setop.args) 8442 else: 8443 query = query.intersect(*setops, distinct=distinct, copy=False, **first_setop.args) 8444 8445 query.set("with", ctes) 8446 8447 return self._build_pipe_cte(query, [exp.Star()]) 8448 8449 def _parse_pipe_syntax_join(self, query: exp.Select) -> t.Optional[exp.Select]: 8450 join = self._parse_join() 8451 if not join: 8452 return None 8453 8454 return query.join(join, copy=False) 8455 8456 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8457 pivots = self._parse_pivots() 8458 if not pivots: 8459 return query 8460 8461 from_ = query.args.get("from") 8462 if from_: 8463 from_.this.set("pivots", pivots) 8464 8465 return self._build_pipe_cte(query, [exp.Star()]) 8466 8467 def _parse_pipe_syntax_query(self, query: exp.Select) -> t.Optional[exp.Select]: 8468 while self._match(TokenType.PIPE_GT): 8469 start = self._curr 8470 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8471 if not parser: 8472 parsed_query = self._parse_pipe_syntax_set_operator( 8473 query 8474 ) or self._parse_pipe_syntax_join(query) 8475 if not parsed_query: 8476 self._retreat(start) 8477 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8478 break 8479 query = parsed_query 8480 else: 8481 query = parser(self, query) 8482 8483 if query and not query.selects: 8484 return query.select("*", copy=False) 8485 8486 return query
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
178class Parser(metaclass=_Parser): 179 """ 180 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 181 182 Args: 183 error_level: The desired error level. 184 Default: ErrorLevel.IMMEDIATE 185 error_message_context: The amount of context to capture from a query string when displaying 186 the error message (in number of characters). 187 Default: 100 188 max_errors: Maximum number of error messages to include in a raised ParseError. 189 This is only relevant if error_level is ErrorLevel.RAISE. 190 Default: 3 191 """ 192 193 FUNCTIONS: t.Dict[str, t.Callable] = { 194 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 195 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 196 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 197 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "CHAR": lambda args: exp.Chr(expressions=args), 204 "CHR": lambda args: exp.Chr(expressions=args), 205 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 206 "CONCAT": lambda args, dialect: exp.Concat( 207 expressions=args, 208 safe=not dialect.STRICT_STRING_CONCAT, 209 coalesce=dialect.CONCAT_COALESCE, 210 ), 211 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 212 expressions=args, 213 safe=not dialect.STRICT_STRING_CONCAT, 214 coalesce=dialect.CONCAT_COALESCE, 215 ), 216 "CONVERT_TIMEZONE": build_convert_timezone, 217 "DATE_TO_DATE_STR": lambda args: exp.Cast( 218 this=seq_get(args, 0), 219 to=exp.DataType(this=exp.DataType.Type.TEXT), 220 ), 221 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 222 start=seq_get(args, 0), 223 end=seq_get(args, 1), 224 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 225 ), 226 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 227 "HEX": build_hex, 228 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 229 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 230 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 231 "LIKE": build_like, 232 "LOG": build_logarithm, 233 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 234 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 235 "LOWER": build_lower, 236 "LPAD": lambda args: build_pad(args), 237 "LEFTPAD": lambda args: build_pad(args), 238 "LTRIM": lambda args: build_trim(args), 239 "MOD": build_mod, 240 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 241 "RPAD": lambda args: build_pad(args, is_left=False), 242 "RTRIM": lambda args: build_trim(args, is_left=False), 243 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 244 if len(args) != 2 245 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 246 "STRPOS": exp.StrPosition.from_arg_list, 247 "CHARINDEX": lambda args: build_locate_strposition(args), 248 "INSTR": exp.StrPosition.from_arg_list, 249 "LOCATE": lambda args: build_locate_strposition(args), 250 "TIME_TO_TIME_STR": lambda args: exp.Cast( 251 this=seq_get(args, 0), 252 to=exp.DataType(this=exp.DataType.Type.TEXT), 253 ), 254 "TO_HEX": build_hex, 255 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 256 this=exp.Cast( 257 this=seq_get(args, 0), 258 to=exp.DataType(this=exp.DataType.Type.TEXT), 259 ), 260 start=exp.Literal.number(1), 261 length=exp.Literal.number(10), 262 ), 263 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 264 "UPPER": build_upper, 265 "VAR_MAP": build_var_map, 266 } 267 268 NO_PAREN_FUNCTIONS = { 269 TokenType.CURRENT_DATE: exp.CurrentDate, 270 TokenType.CURRENT_DATETIME: exp.CurrentDate, 271 TokenType.CURRENT_TIME: exp.CurrentTime, 272 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 273 TokenType.CURRENT_USER: exp.CurrentUser, 274 } 275 276 STRUCT_TYPE_TOKENS = { 277 TokenType.NESTED, 278 TokenType.OBJECT, 279 TokenType.STRUCT, 280 TokenType.UNION, 281 } 282 283 NESTED_TYPE_TOKENS = { 284 TokenType.ARRAY, 285 TokenType.LIST, 286 TokenType.LOWCARDINALITY, 287 TokenType.MAP, 288 TokenType.NULLABLE, 289 TokenType.RANGE, 290 *STRUCT_TYPE_TOKENS, 291 } 292 293 ENUM_TYPE_TOKENS = { 294 TokenType.DYNAMIC, 295 TokenType.ENUM, 296 TokenType.ENUM8, 297 TokenType.ENUM16, 298 } 299 300 AGGREGATE_TYPE_TOKENS = { 301 TokenType.AGGREGATEFUNCTION, 302 TokenType.SIMPLEAGGREGATEFUNCTION, 303 } 304 305 TYPE_TOKENS = { 306 TokenType.BIT, 307 TokenType.BOOLEAN, 308 TokenType.TINYINT, 309 TokenType.UTINYINT, 310 TokenType.SMALLINT, 311 TokenType.USMALLINT, 312 TokenType.INT, 313 TokenType.UINT, 314 TokenType.BIGINT, 315 TokenType.UBIGINT, 316 TokenType.INT128, 317 TokenType.UINT128, 318 TokenType.INT256, 319 TokenType.UINT256, 320 TokenType.MEDIUMINT, 321 TokenType.UMEDIUMINT, 322 TokenType.FIXEDSTRING, 323 TokenType.FLOAT, 324 TokenType.DOUBLE, 325 TokenType.UDOUBLE, 326 TokenType.CHAR, 327 TokenType.NCHAR, 328 TokenType.VARCHAR, 329 TokenType.NVARCHAR, 330 TokenType.BPCHAR, 331 TokenType.TEXT, 332 TokenType.MEDIUMTEXT, 333 TokenType.LONGTEXT, 334 TokenType.BLOB, 335 TokenType.MEDIUMBLOB, 336 TokenType.LONGBLOB, 337 TokenType.BINARY, 338 TokenType.VARBINARY, 339 TokenType.JSON, 340 TokenType.JSONB, 341 TokenType.INTERVAL, 342 TokenType.TINYBLOB, 343 TokenType.TINYTEXT, 344 TokenType.TIME, 345 TokenType.TIMETZ, 346 TokenType.TIMESTAMP, 347 TokenType.TIMESTAMP_S, 348 TokenType.TIMESTAMP_MS, 349 TokenType.TIMESTAMP_NS, 350 TokenType.TIMESTAMPTZ, 351 TokenType.TIMESTAMPLTZ, 352 TokenType.TIMESTAMPNTZ, 353 TokenType.DATETIME, 354 TokenType.DATETIME2, 355 TokenType.DATETIME64, 356 TokenType.SMALLDATETIME, 357 TokenType.DATE, 358 TokenType.DATE32, 359 TokenType.INT4RANGE, 360 TokenType.INT4MULTIRANGE, 361 TokenType.INT8RANGE, 362 TokenType.INT8MULTIRANGE, 363 TokenType.NUMRANGE, 364 TokenType.NUMMULTIRANGE, 365 TokenType.TSRANGE, 366 TokenType.TSMULTIRANGE, 367 TokenType.TSTZRANGE, 368 TokenType.TSTZMULTIRANGE, 369 TokenType.DATERANGE, 370 TokenType.DATEMULTIRANGE, 371 TokenType.DECIMAL, 372 TokenType.DECIMAL32, 373 TokenType.DECIMAL64, 374 TokenType.DECIMAL128, 375 TokenType.DECIMAL256, 376 TokenType.UDECIMAL, 377 TokenType.BIGDECIMAL, 378 TokenType.UUID, 379 TokenType.GEOGRAPHY, 380 TokenType.GEOMETRY, 381 TokenType.POINT, 382 TokenType.RING, 383 TokenType.LINESTRING, 384 TokenType.MULTILINESTRING, 385 TokenType.POLYGON, 386 TokenType.MULTIPOLYGON, 387 TokenType.HLLSKETCH, 388 TokenType.HSTORE, 389 TokenType.PSEUDO_TYPE, 390 TokenType.SUPER, 391 TokenType.SERIAL, 392 TokenType.SMALLSERIAL, 393 TokenType.BIGSERIAL, 394 TokenType.XML, 395 TokenType.YEAR, 396 TokenType.USERDEFINED, 397 TokenType.MONEY, 398 TokenType.SMALLMONEY, 399 TokenType.ROWVERSION, 400 TokenType.IMAGE, 401 TokenType.VARIANT, 402 TokenType.VECTOR, 403 TokenType.VOID, 404 TokenType.OBJECT, 405 TokenType.OBJECT_IDENTIFIER, 406 TokenType.INET, 407 TokenType.IPADDRESS, 408 TokenType.IPPREFIX, 409 TokenType.IPV4, 410 TokenType.IPV6, 411 TokenType.UNKNOWN, 412 TokenType.NOTHING, 413 TokenType.NULL, 414 TokenType.NAME, 415 TokenType.TDIGEST, 416 TokenType.DYNAMIC, 417 *ENUM_TYPE_TOKENS, 418 *NESTED_TYPE_TOKENS, 419 *AGGREGATE_TYPE_TOKENS, 420 } 421 422 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 423 TokenType.BIGINT: TokenType.UBIGINT, 424 TokenType.INT: TokenType.UINT, 425 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 426 TokenType.SMALLINT: TokenType.USMALLINT, 427 TokenType.TINYINT: TokenType.UTINYINT, 428 TokenType.DECIMAL: TokenType.UDECIMAL, 429 TokenType.DOUBLE: TokenType.UDOUBLE, 430 } 431 432 SUBQUERY_PREDICATES = { 433 TokenType.ANY: exp.Any, 434 TokenType.ALL: exp.All, 435 TokenType.EXISTS: exp.Exists, 436 TokenType.SOME: exp.Any, 437 } 438 439 RESERVED_TOKENS = { 440 *Tokenizer.SINGLE_TOKENS.values(), 441 TokenType.SELECT, 442 } - {TokenType.IDENTIFIER} 443 444 DB_CREATABLES = { 445 TokenType.DATABASE, 446 TokenType.DICTIONARY, 447 TokenType.FILE_FORMAT, 448 TokenType.MODEL, 449 TokenType.NAMESPACE, 450 TokenType.SCHEMA, 451 TokenType.SEQUENCE, 452 TokenType.SINK, 453 TokenType.SOURCE, 454 TokenType.STAGE, 455 TokenType.STORAGE_INTEGRATION, 456 TokenType.STREAMLIT, 457 TokenType.TABLE, 458 TokenType.TAG, 459 TokenType.VIEW, 460 TokenType.WAREHOUSE, 461 } 462 463 CREATABLES = { 464 TokenType.COLUMN, 465 TokenType.CONSTRAINT, 466 TokenType.FOREIGN_KEY, 467 TokenType.FUNCTION, 468 TokenType.INDEX, 469 TokenType.PROCEDURE, 470 *DB_CREATABLES, 471 } 472 473 ALTERABLES = { 474 TokenType.INDEX, 475 TokenType.TABLE, 476 TokenType.VIEW, 477 } 478 479 # Tokens that can represent identifiers 480 ID_VAR_TOKENS = { 481 TokenType.ALL, 482 TokenType.ATTACH, 483 TokenType.VAR, 484 TokenType.ANTI, 485 TokenType.APPLY, 486 TokenType.ASC, 487 TokenType.ASOF, 488 TokenType.AUTO_INCREMENT, 489 TokenType.BEGIN, 490 TokenType.BPCHAR, 491 TokenType.CACHE, 492 TokenType.CASE, 493 TokenType.COLLATE, 494 TokenType.COMMAND, 495 TokenType.COMMENT, 496 TokenType.COMMIT, 497 TokenType.CONSTRAINT, 498 TokenType.COPY, 499 TokenType.CUBE, 500 TokenType.CURRENT_SCHEMA, 501 TokenType.DEFAULT, 502 TokenType.DELETE, 503 TokenType.DESC, 504 TokenType.DESCRIBE, 505 TokenType.DETACH, 506 TokenType.DICTIONARY, 507 TokenType.DIV, 508 TokenType.END, 509 TokenType.EXECUTE, 510 TokenType.EXPORT, 511 TokenType.ESCAPE, 512 TokenType.FALSE, 513 TokenType.FIRST, 514 TokenType.FILTER, 515 TokenType.FINAL, 516 TokenType.FORMAT, 517 TokenType.FULL, 518 TokenType.GET, 519 TokenType.IDENTIFIER, 520 TokenType.IS, 521 TokenType.ISNULL, 522 TokenType.INTERVAL, 523 TokenType.KEEP, 524 TokenType.KILL, 525 TokenType.LEFT, 526 TokenType.LIMIT, 527 TokenType.LOAD, 528 TokenType.MERGE, 529 TokenType.NATURAL, 530 TokenType.NEXT, 531 TokenType.OFFSET, 532 TokenType.OPERATOR, 533 TokenType.ORDINALITY, 534 TokenType.OVERLAPS, 535 TokenType.OVERWRITE, 536 TokenType.PARTITION, 537 TokenType.PERCENT, 538 TokenType.PIVOT, 539 TokenType.PRAGMA, 540 TokenType.PUT, 541 TokenType.RANGE, 542 TokenType.RECURSIVE, 543 TokenType.REFERENCES, 544 TokenType.REFRESH, 545 TokenType.RENAME, 546 TokenType.REPLACE, 547 TokenType.RIGHT, 548 TokenType.ROLLUP, 549 TokenType.ROW, 550 TokenType.ROWS, 551 TokenType.SEMI, 552 TokenType.SET, 553 TokenType.SETTINGS, 554 TokenType.SHOW, 555 TokenType.TEMPORARY, 556 TokenType.TOP, 557 TokenType.TRUE, 558 TokenType.TRUNCATE, 559 TokenType.UNIQUE, 560 TokenType.UNNEST, 561 TokenType.UNPIVOT, 562 TokenType.UPDATE, 563 TokenType.USE, 564 TokenType.VOLATILE, 565 TokenType.WINDOW, 566 *CREATABLES, 567 *SUBQUERY_PREDICATES, 568 *TYPE_TOKENS, 569 *NO_PAREN_FUNCTIONS, 570 } 571 ID_VAR_TOKENS.remove(TokenType.UNION) 572 573 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 574 TokenType.ANTI, 575 TokenType.APPLY, 576 TokenType.ASOF, 577 TokenType.FULL, 578 TokenType.LEFT, 579 TokenType.LOCK, 580 TokenType.NATURAL, 581 TokenType.RIGHT, 582 TokenType.SEMI, 583 TokenType.WINDOW, 584 } 585 586 ALIAS_TOKENS = ID_VAR_TOKENS 587 588 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 589 590 ARRAY_CONSTRUCTORS = { 591 "ARRAY": exp.Array, 592 "LIST": exp.List, 593 } 594 595 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 596 597 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 598 599 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 600 601 FUNC_TOKENS = { 602 TokenType.COLLATE, 603 TokenType.COMMAND, 604 TokenType.CURRENT_DATE, 605 TokenType.CURRENT_DATETIME, 606 TokenType.CURRENT_SCHEMA, 607 TokenType.CURRENT_TIMESTAMP, 608 TokenType.CURRENT_TIME, 609 TokenType.CURRENT_USER, 610 TokenType.FILTER, 611 TokenType.FIRST, 612 TokenType.FORMAT, 613 TokenType.GET, 614 TokenType.GLOB, 615 TokenType.IDENTIFIER, 616 TokenType.INDEX, 617 TokenType.ISNULL, 618 TokenType.ILIKE, 619 TokenType.INSERT, 620 TokenType.LIKE, 621 TokenType.MERGE, 622 TokenType.NEXT, 623 TokenType.OFFSET, 624 TokenType.PRIMARY_KEY, 625 TokenType.RANGE, 626 TokenType.REPLACE, 627 TokenType.RLIKE, 628 TokenType.ROW, 629 TokenType.UNNEST, 630 TokenType.VAR, 631 TokenType.LEFT, 632 TokenType.RIGHT, 633 TokenType.SEQUENCE, 634 TokenType.DATE, 635 TokenType.DATETIME, 636 TokenType.TABLE, 637 TokenType.TIMESTAMP, 638 TokenType.TIMESTAMPTZ, 639 TokenType.TRUNCATE, 640 TokenType.WINDOW, 641 TokenType.XOR, 642 *TYPE_TOKENS, 643 *SUBQUERY_PREDICATES, 644 } 645 646 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 647 TokenType.AND: exp.And, 648 } 649 650 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.COLON_EQ: exp.PropertyEQ, 652 } 653 654 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.OR: exp.Or, 656 } 657 658 EQUALITY = { 659 TokenType.EQ: exp.EQ, 660 TokenType.NEQ: exp.NEQ, 661 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 662 } 663 664 COMPARISON = { 665 TokenType.GT: exp.GT, 666 TokenType.GTE: exp.GTE, 667 TokenType.LT: exp.LT, 668 TokenType.LTE: exp.LTE, 669 } 670 671 BITWISE = { 672 TokenType.AMP: exp.BitwiseAnd, 673 TokenType.CARET: exp.BitwiseXor, 674 TokenType.PIPE: exp.BitwiseOr, 675 } 676 677 TERM = { 678 TokenType.DASH: exp.Sub, 679 TokenType.PLUS: exp.Add, 680 TokenType.MOD: exp.Mod, 681 TokenType.COLLATE: exp.Collate, 682 } 683 684 FACTOR = { 685 TokenType.DIV: exp.IntDiv, 686 TokenType.LR_ARROW: exp.Distance, 687 TokenType.SLASH: exp.Div, 688 TokenType.STAR: exp.Mul, 689 } 690 691 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 692 693 TIMES = { 694 TokenType.TIME, 695 TokenType.TIMETZ, 696 } 697 698 TIMESTAMPS = { 699 TokenType.TIMESTAMP, 700 TokenType.TIMESTAMPNTZ, 701 TokenType.TIMESTAMPTZ, 702 TokenType.TIMESTAMPLTZ, 703 *TIMES, 704 } 705 706 SET_OPERATIONS = { 707 TokenType.UNION, 708 TokenType.INTERSECT, 709 TokenType.EXCEPT, 710 } 711 712 JOIN_METHODS = { 713 TokenType.ASOF, 714 TokenType.NATURAL, 715 TokenType.POSITIONAL, 716 } 717 718 JOIN_SIDES = { 719 TokenType.LEFT, 720 TokenType.RIGHT, 721 TokenType.FULL, 722 } 723 724 JOIN_KINDS = { 725 TokenType.ANTI, 726 TokenType.CROSS, 727 TokenType.INNER, 728 TokenType.OUTER, 729 TokenType.SEMI, 730 TokenType.STRAIGHT_JOIN, 731 } 732 733 JOIN_HINTS: t.Set[str] = set() 734 735 LAMBDAS = { 736 TokenType.ARROW: lambda self, expressions: self.expression( 737 exp.Lambda, 738 this=self._replace_lambda( 739 self._parse_assignment(), 740 expressions, 741 ), 742 expressions=expressions, 743 ), 744 TokenType.FARROW: lambda self, expressions: self.expression( 745 exp.Kwarg, 746 this=exp.var(expressions[0].name), 747 expression=self._parse_assignment(), 748 ), 749 } 750 751 COLUMN_OPERATORS = { 752 TokenType.DOT: None, 753 TokenType.DOTCOLON: lambda self, this, to: self.expression( 754 exp.JSONCast, 755 this=this, 756 to=to, 757 ), 758 TokenType.DCOLON: lambda self, this, to: self.expression( 759 exp.Cast if self.STRICT_CAST else exp.TryCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.ARROW: lambda self, this, path: self.expression( 764 exp.JSONExtract, 765 this=this, 766 expression=self.dialect.to_json_path(path), 767 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 768 ), 769 TokenType.DARROW: lambda self, this, path: self.expression( 770 exp.JSONExtractScalar, 771 this=this, 772 expression=self.dialect.to_json_path(path), 773 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 774 ), 775 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 776 exp.JSONBExtract, 777 this=this, 778 expression=path, 779 ), 780 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 781 exp.JSONBExtractScalar, 782 this=this, 783 expression=path, 784 ), 785 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 786 exp.JSONBContains, 787 this=this, 788 expression=key, 789 ), 790 } 791 792 EXPRESSION_PARSERS = { 793 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 794 exp.Column: lambda self: self._parse_column(), 795 exp.Condition: lambda self: self._parse_assignment(), 796 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 797 exp.Expression: lambda self: self._parse_expression(), 798 exp.From: lambda self: self._parse_from(joins=True), 799 exp.Group: lambda self: self._parse_group(), 800 exp.Having: lambda self: self._parse_having(), 801 exp.Hint: lambda self: self._parse_hint_body(), 802 exp.Identifier: lambda self: self._parse_id_var(), 803 exp.Join: lambda self: self._parse_join(), 804 exp.Lambda: lambda self: self._parse_lambda(), 805 exp.Lateral: lambda self: self._parse_lateral(), 806 exp.Limit: lambda self: self._parse_limit(), 807 exp.Offset: lambda self: self._parse_offset(), 808 exp.Order: lambda self: self._parse_order(), 809 exp.Ordered: lambda self: self._parse_ordered(), 810 exp.Properties: lambda self: self._parse_properties(), 811 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 812 exp.Qualify: lambda self: self._parse_qualify(), 813 exp.Returning: lambda self: self._parse_returning(), 814 exp.Select: lambda self: self._parse_select(), 815 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 816 exp.Table: lambda self: self._parse_table_parts(), 817 exp.TableAlias: lambda self: self._parse_table_alias(), 818 exp.Tuple: lambda self: self._parse_value(values=False), 819 exp.Whens: lambda self: self._parse_when_matched(), 820 exp.Where: lambda self: self._parse_where(), 821 exp.Window: lambda self: self._parse_named_window(), 822 exp.With: lambda self: self._parse_with(), 823 "JOIN_TYPE": lambda self: self._parse_join_parts(), 824 } 825 826 STATEMENT_PARSERS = { 827 TokenType.ALTER: lambda self: self._parse_alter(), 828 TokenType.ANALYZE: lambda self: self._parse_analyze(), 829 TokenType.BEGIN: lambda self: self._parse_transaction(), 830 TokenType.CACHE: lambda self: self._parse_cache(), 831 TokenType.COMMENT: lambda self: self._parse_comment(), 832 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 833 TokenType.COPY: lambda self: self._parse_copy(), 834 TokenType.CREATE: lambda self: self._parse_create(), 835 TokenType.DELETE: lambda self: self._parse_delete(), 836 TokenType.DESC: lambda self: self._parse_describe(), 837 TokenType.DESCRIBE: lambda self: self._parse_describe(), 838 TokenType.DROP: lambda self: self._parse_drop(), 839 TokenType.GRANT: lambda self: self._parse_grant(), 840 TokenType.INSERT: lambda self: self._parse_insert(), 841 TokenType.KILL: lambda self: self._parse_kill(), 842 TokenType.LOAD: lambda self: self._parse_load(), 843 TokenType.MERGE: lambda self: self._parse_merge(), 844 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 845 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 846 TokenType.REFRESH: lambda self: self._parse_refresh(), 847 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 848 TokenType.SET: lambda self: self._parse_set(), 849 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 850 TokenType.UNCACHE: lambda self: self._parse_uncache(), 851 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 852 TokenType.UPDATE: lambda self: self._parse_update(), 853 TokenType.USE: lambda self: self._parse_use(), 854 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 855 } 856 857 UNARY_PARSERS = { 858 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 859 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 860 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 861 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 862 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 863 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 864 } 865 866 STRING_PARSERS = { 867 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 868 exp.RawString, this=token.text 869 ), 870 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 871 exp.National, this=token.text 872 ), 873 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 874 TokenType.STRING: lambda self, token: self.expression( 875 exp.Literal, this=token.text, is_string=True 876 ), 877 TokenType.UNICODE_STRING: lambda self, token: self.expression( 878 exp.UnicodeString, 879 this=token.text, 880 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 881 ), 882 } 883 884 NUMERIC_PARSERS = { 885 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 886 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 887 TokenType.HEX_STRING: lambda self, token: self.expression( 888 exp.HexString, 889 this=token.text, 890 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 891 ), 892 TokenType.NUMBER: lambda self, token: self.expression( 893 exp.Literal, this=token.text, is_string=False 894 ), 895 } 896 897 PRIMARY_PARSERS = { 898 **STRING_PARSERS, 899 **NUMERIC_PARSERS, 900 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 901 TokenType.NULL: lambda self, _: self.expression(exp.Null), 902 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 903 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 904 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 905 TokenType.STAR: lambda self, _: self._parse_star_ops(), 906 } 907 908 PLACEHOLDER_PARSERS = { 909 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 910 TokenType.PARAMETER: lambda self: self._parse_parameter(), 911 TokenType.COLON: lambda self: ( 912 self.expression(exp.Placeholder, this=self._prev.text) 913 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 914 else None 915 ), 916 } 917 918 RANGE_PARSERS = { 919 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 920 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 921 TokenType.GLOB: binary_range_parser(exp.Glob), 922 TokenType.ILIKE: binary_range_parser(exp.ILike), 923 TokenType.IN: lambda self, this: self._parse_in(this), 924 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 925 TokenType.IS: lambda self, this: self._parse_is(this), 926 TokenType.LIKE: binary_range_parser(exp.Like), 927 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 928 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 929 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 930 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 931 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 932 } 933 934 PIPE_SYNTAX_TRANSFORM_PARSERS = { 935 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 936 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 937 "ORDER BY": lambda self, query: query.order_by( 938 self._parse_order(), append=False, copy=False 939 ), 940 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 941 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 942 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 943 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 944 } 945 946 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 947 "ALLOWED_VALUES": lambda self: self.expression( 948 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 949 ), 950 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 951 "AUTO": lambda self: self._parse_auto_property(), 952 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 953 "BACKUP": lambda self: self.expression( 954 exp.BackupProperty, this=self._parse_var(any_token=True) 955 ), 956 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 957 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 958 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 959 "CHECKSUM": lambda self: self._parse_checksum(), 960 "CLUSTER BY": lambda self: self._parse_cluster(), 961 "CLUSTERED": lambda self: self._parse_clustered_by(), 962 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 963 exp.CollateProperty, **kwargs 964 ), 965 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 966 "CONTAINS": lambda self: self._parse_contains_property(), 967 "COPY": lambda self: self._parse_copy_property(), 968 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 969 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 970 "DEFINER": lambda self: self._parse_definer(), 971 "DETERMINISTIC": lambda self: self.expression( 972 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 973 ), 974 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 975 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 976 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 977 "DISTKEY": lambda self: self._parse_distkey(), 978 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 979 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 980 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 981 "ENVIRONMENT": lambda self: self.expression( 982 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 983 ), 984 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 985 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 986 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 987 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 988 "FREESPACE": lambda self: self._parse_freespace(), 989 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 990 "HEAP": lambda self: self.expression(exp.HeapProperty), 991 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 992 "IMMUTABLE": lambda self: self.expression( 993 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 994 ), 995 "INHERITS": lambda self: self.expression( 996 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 997 ), 998 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 999 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1000 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1001 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1002 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1003 "LIKE": lambda self: self._parse_create_like(), 1004 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1005 "LOCK": lambda self: self._parse_locking(), 1006 "LOCKING": lambda self: self._parse_locking(), 1007 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1008 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1009 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1010 "MODIFIES": lambda self: self._parse_modifies_property(), 1011 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1012 "NO": lambda self: self._parse_no_property(), 1013 "ON": lambda self: self._parse_on_property(), 1014 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1015 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1016 "PARTITION": lambda self: self._parse_partitioned_of(), 1017 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1018 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1019 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1020 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1021 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1022 "READS": lambda self: self._parse_reads_property(), 1023 "REMOTE": lambda self: self._parse_remote_with_connection(), 1024 "RETURNS": lambda self: self._parse_returns(), 1025 "STRICT": lambda self: self.expression(exp.StrictProperty), 1026 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1027 "ROW": lambda self: self._parse_row(), 1028 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1029 "SAMPLE": lambda self: self.expression( 1030 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1031 ), 1032 "SECURE": lambda self: self.expression(exp.SecureProperty), 1033 "SECURITY": lambda self: self._parse_security(), 1034 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1035 "SETTINGS": lambda self: self._parse_settings_property(), 1036 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1037 "SORTKEY": lambda self: self._parse_sortkey(), 1038 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1039 "STABLE": lambda self: self.expression( 1040 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1041 ), 1042 "STORED": lambda self: self._parse_stored(), 1043 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1044 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1045 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1046 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1047 "TO": lambda self: self._parse_to_table(), 1048 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1049 "TRANSFORM": lambda self: self.expression( 1050 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1051 ), 1052 "TTL": lambda self: self._parse_ttl(), 1053 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1054 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1055 "VOLATILE": lambda self: self._parse_volatile_property(), 1056 "WITH": lambda self: self._parse_with_property(), 1057 } 1058 1059 CONSTRAINT_PARSERS = { 1060 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1061 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1062 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1063 "CHARACTER SET": lambda self: self.expression( 1064 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1065 ), 1066 "CHECK": lambda self: self.expression( 1067 exp.CheckColumnConstraint, 1068 this=self._parse_wrapped(self._parse_assignment), 1069 enforced=self._match_text_seq("ENFORCED"), 1070 ), 1071 "COLLATE": lambda self: self.expression( 1072 exp.CollateColumnConstraint, 1073 this=self._parse_identifier() or self._parse_column(), 1074 ), 1075 "COMMENT": lambda self: self.expression( 1076 exp.CommentColumnConstraint, this=self._parse_string() 1077 ), 1078 "COMPRESS": lambda self: self._parse_compress(), 1079 "CLUSTERED": lambda self: self.expression( 1080 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1081 ), 1082 "NONCLUSTERED": lambda self: self.expression( 1083 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1084 ), 1085 "DEFAULT": lambda self: self.expression( 1086 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1087 ), 1088 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1089 "EPHEMERAL": lambda self: self.expression( 1090 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1091 ), 1092 "EXCLUDE": lambda self: self.expression( 1093 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1094 ), 1095 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1096 "FORMAT": lambda self: self.expression( 1097 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1098 ), 1099 "GENERATED": lambda self: self._parse_generated_as_identity(), 1100 "IDENTITY": lambda self: self._parse_auto_increment(), 1101 "INLINE": lambda self: self._parse_inline(), 1102 "LIKE": lambda self: self._parse_create_like(), 1103 "NOT": lambda self: self._parse_not_constraint(), 1104 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1105 "ON": lambda self: ( 1106 self._match(TokenType.UPDATE) 1107 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1108 ) 1109 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1110 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1111 "PERIOD": lambda self: self._parse_period_for_system_time(), 1112 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1113 "REFERENCES": lambda self: self._parse_references(match=False), 1114 "TITLE": lambda self: self.expression( 1115 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1116 ), 1117 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1118 "UNIQUE": lambda self: self._parse_unique(), 1119 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1120 "WATERMARK": lambda self: self.expression( 1121 exp.WatermarkColumnConstraint, 1122 this=self._match(TokenType.FOR) and self._parse_column(), 1123 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1124 ), 1125 "WITH": lambda self: self.expression( 1126 exp.Properties, expressions=self._parse_wrapped_properties() 1127 ), 1128 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1129 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1130 } 1131 1132 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1133 klass = ( 1134 exp.PartitionedByBucket 1135 if self._prev.text.upper() == "BUCKET" 1136 else exp.PartitionByTruncate 1137 ) 1138 1139 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1140 this, expression = seq_get(args, 0), seq_get(args, 1) 1141 1142 if isinstance(this, exp.Literal): 1143 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1144 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1145 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1146 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1147 # 1148 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1149 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1150 this, expression = expression, this 1151 1152 return self.expression(klass, this=this, expression=expression) 1153 1154 ALTER_PARSERS = { 1155 "ADD": lambda self: self._parse_alter_table_add(), 1156 "AS": lambda self: self._parse_select(), 1157 "ALTER": lambda self: self._parse_alter_table_alter(), 1158 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1159 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1160 "DROP": lambda self: self._parse_alter_table_drop(), 1161 "RENAME": lambda self: self._parse_alter_table_rename(), 1162 "SET": lambda self: self._parse_alter_table_set(), 1163 "SWAP": lambda self: self.expression( 1164 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1165 ), 1166 } 1167 1168 ALTER_ALTER_PARSERS = { 1169 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1170 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1171 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1172 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1173 } 1174 1175 SCHEMA_UNNAMED_CONSTRAINTS = { 1176 "CHECK", 1177 "EXCLUDE", 1178 "FOREIGN KEY", 1179 "LIKE", 1180 "PERIOD", 1181 "PRIMARY KEY", 1182 "UNIQUE", 1183 "WATERMARK", 1184 "BUCKET", 1185 "TRUNCATE", 1186 } 1187 1188 NO_PAREN_FUNCTION_PARSERS = { 1189 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1190 "CASE": lambda self: self._parse_case(), 1191 "CONNECT_BY_ROOT": lambda self: self.expression( 1192 exp.ConnectByRoot, this=self._parse_column() 1193 ), 1194 "IF": lambda self: self._parse_if(), 1195 } 1196 1197 INVALID_FUNC_NAME_TOKENS = { 1198 TokenType.IDENTIFIER, 1199 TokenType.STRING, 1200 } 1201 1202 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1203 1204 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1205 1206 FUNCTION_PARSERS = { 1207 **{ 1208 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1209 }, 1210 **{ 1211 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1212 }, 1213 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1214 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1215 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1216 "DECODE": lambda self: self._parse_decode(), 1217 "EXTRACT": lambda self: self._parse_extract(), 1218 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1219 "GAP_FILL": lambda self: self._parse_gap_fill(), 1220 "JSON_OBJECT": lambda self: self._parse_json_object(), 1221 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1222 "JSON_TABLE": lambda self: self._parse_json_table(), 1223 "MATCH": lambda self: self._parse_match_against(), 1224 "NORMALIZE": lambda self: self._parse_normalize(), 1225 "OPENJSON": lambda self: self._parse_open_json(), 1226 "OVERLAY": lambda self: self._parse_overlay(), 1227 "POSITION": lambda self: self._parse_position(), 1228 "PREDICT": lambda self: self._parse_predict(), 1229 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1230 "STRING_AGG": lambda self: self._parse_string_agg(), 1231 "SUBSTRING": lambda self: self._parse_substring(), 1232 "TRIM": lambda self: self._parse_trim(), 1233 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1234 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1235 "XMLELEMENT": lambda self: self.expression( 1236 exp.XMLElement, 1237 this=self._match_text_seq("NAME") and self._parse_id_var(), 1238 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1239 ), 1240 "XMLTABLE": lambda self: self._parse_xml_table(), 1241 } 1242 1243 QUERY_MODIFIER_PARSERS = { 1244 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1245 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1246 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1247 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1248 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1249 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1250 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1251 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1252 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1253 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1254 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1255 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1256 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1257 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1258 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1259 TokenType.CLUSTER_BY: lambda self: ( 1260 "cluster", 1261 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1262 ), 1263 TokenType.DISTRIBUTE_BY: lambda self: ( 1264 "distribute", 1265 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1266 ), 1267 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1268 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1269 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1270 } 1271 1272 SET_PARSERS = { 1273 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1274 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1275 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1276 "TRANSACTION": lambda self: self._parse_set_transaction(), 1277 } 1278 1279 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1280 1281 TYPE_LITERAL_PARSERS = { 1282 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1283 } 1284 1285 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1286 1287 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1288 1289 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1290 1291 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1292 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1293 "ISOLATION": ( 1294 ("LEVEL", "REPEATABLE", "READ"), 1295 ("LEVEL", "READ", "COMMITTED"), 1296 ("LEVEL", "READ", "UNCOMITTED"), 1297 ("LEVEL", "SERIALIZABLE"), 1298 ), 1299 "READ": ("WRITE", "ONLY"), 1300 } 1301 1302 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1303 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1304 ) 1305 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1306 1307 CREATE_SEQUENCE: OPTIONS_TYPE = { 1308 "SCALE": ("EXTEND", "NOEXTEND"), 1309 "SHARD": ("EXTEND", "NOEXTEND"), 1310 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1311 **dict.fromkeys( 1312 ( 1313 "SESSION", 1314 "GLOBAL", 1315 "KEEP", 1316 "NOKEEP", 1317 "ORDER", 1318 "NOORDER", 1319 "NOCACHE", 1320 "CYCLE", 1321 "NOCYCLE", 1322 "NOMINVALUE", 1323 "NOMAXVALUE", 1324 "NOSCALE", 1325 "NOSHARD", 1326 ), 1327 tuple(), 1328 ), 1329 } 1330 1331 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1332 1333 USABLES: OPTIONS_TYPE = dict.fromkeys( 1334 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1335 ) 1336 1337 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1338 1339 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1340 "TYPE": ("EVOLUTION",), 1341 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1342 } 1343 1344 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1345 1346 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1347 1348 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1349 "NOT": ("ENFORCED",), 1350 "MATCH": ( 1351 "FULL", 1352 "PARTIAL", 1353 "SIMPLE", 1354 ), 1355 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1356 "USING": ( 1357 "BTREE", 1358 "HASH", 1359 ), 1360 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1361 } 1362 1363 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1364 "NO": ("OTHERS",), 1365 "CURRENT": ("ROW",), 1366 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1367 } 1368 1369 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1370 1371 CLONE_KEYWORDS = {"CLONE", "COPY"} 1372 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1373 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1374 1375 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1376 1377 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1378 1379 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1380 1381 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1382 1383 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1384 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1385 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1386 1387 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1388 1389 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1390 1391 ADD_CONSTRAINT_TOKENS = { 1392 TokenType.CONSTRAINT, 1393 TokenType.FOREIGN_KEY, 1394 TokenType.INDEX, 1395 TokenType.KEY, 1396 TokenType.PRIMARY_KEY, 1397 TokenType.UNIQUE, 1398 } 1399 1400 DISTINCT_TOKENS = {TokenType.DISTINCT} 1401 1402 NULL_TOKENS = {TokenType.NULL} 1403 1404 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1405 1406 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1407 1408 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1409 1410 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1411 1412 ODBC_DATETIME_LITERALS = { 1413 "d": exp.Date, 1414 "t": exp.Time, 1415 "ts": exp.Timestamp, 1416 } 1417 1418 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1419 1420 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1421 1422 # The style options for the DESCRIBE statement 1423 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1424 1425 # The style options for the ANALYZE statement 1426 ANALYZE_STYLES = { 1427 "BUFFER_USAGE_LIMIT", 1428 "FULL", 1429 "LOCAL", 1430 "NO_WRITE_TO_BINLOG", 1431 "SAMPLE", 1432 "SKIP_LOCKED", 1433 "VERBOSE", 1434 } 1435 1436 ANALYZE_EXPRESSION_PARSERS = { 1437 "ALL": lambda self: self._parse_analyze_columns(), 1438 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1439 "DELETE": lambda self: self._parse_analyze_delete(), 1440 "DROP": lambda self: self._parse_analyze_histogram(), 1441 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1442 "LIST": lambda self: self._parse_analyze_list(), 1443 "PREDICATE": lambda self: self._parse_analyze_columns(), 1444 "UPDATE": lambda self: self._parse_analyze_histogram(), 1445 "VALIDATE": lambda self: self._parse_analyze_validate(), 1446 } 1447 1448 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1449 1450 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1451 1452 OPERATION_MODIFIERS: t.Set[str] = set() 1453 1454 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1455 1456 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1457 1458 STRICT_CAST = True 1459 1460 PREFIXED_PIVOT_COLUMNS = False 1461 IDENTIFY_PIVOT_STRINGS = False 1462 1463 LOG_DEFAULTS_TO_LN = False 1464 1465 # Whether the table sample clause expects CSV syntax 1466 TABLESAMPLE_CSV = False 1467 1468 # The default method used for table sampling 1469 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1470 1471 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1472 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1473 1474 # Whether the TRIM function expects the characters to trim as its first argument 1475 TRIM_PATTERN_FIRST = False 1476 1477 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1478 STRING_ALIASES = False 1479 1480 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1481 MODIFIERS_ATTACHED_TO_SET_OP = True 1482 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1483 1484 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1485 NO_PAREN_IF_COMMANDS = True 1486 1487 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1488 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1489 1490 # Whether the `:` operator is used to extract a value from a VARIANT column 1491 COLON_IS_VARIANT_EXTRACT = False 1492 1493 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1494 # If this is True and '(' is not found, the keyword will be treated as an identifier 1495 VALUES_FOLLOWED_BY_PAREN = True 1496 1497 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1498 SUPPORTS_IMPLICIT_UNNEST = False 1499 1500 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1501 INTERVAL_SPANS = True 1502 1503 # Whether a PARTITION clause can follow a table reference 1504 SUPPORTS_PARTITION_SELECTION = False 1505 1506 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1507 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1508 1509 # Whether the 'AS' keyword is optional in the CTE definition syntax 1510 OPTIONAL_ALIAS_TOKEN_CTE = True 1511 1512 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1513 ALTER_RENAME_REQUIRES_COLUMN = True 1514 1515 __slots__ = ( 1516 "error_level", 1517 "error_message_context", 1518 "max_errors", 1519 "dialect", 1520 "sql", 1521 "errors", 1522 "_tokens", 1523 "_index", 1524 "_curr", 1525 "_next", 1526 "_prev", 1527 "_prev_comments", 1528 "_pipe_cte_counter", 1529 ) 1530 1531 # Autofilled 1532 SHOW_TRIE: t.Dict = {} 1533 SET_TRIE: t.Dict = {} 1534 1535 def __init__( 1536 self, 1537 error_level: t.Optional[ErrorLevel] = None, 1538 error_message_context: int = 100, 1539 max_errors: int = 3, 1540 dialect: DialectType = None, 1541 ): 1542 from sqlglot.dialects import Dialect 1543 1544 self.error_level = error_level or ErrorLevel.IMMEDIATE 1545 self.error_message_context = error_message_context 1546 self.max_errors = max_errors 1547 self.dialect = Dialect.get_or_raise(dialect) 1548 self.reset() 1549 1550 def reset(self): 1551 self.sql = "" 1552 self.errors = [] 1553 self._tokens = [] 1554 self._index = 0 1555 self._curr = None 1556 self._next = None 1557 self._prev = None 1558 self._prev_comments = None 1559 self._pipe_cte_counter = 0 1560 1561 def parse( 1562 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1563 ) -> t.List[t.Optional[exp.Expression]]: 1564 """ 1565 Parses a list of tokens and returns a list of syntax trees, one tree 1566 per parsed SQL statement. 1567 1568 Args: 1569 raw_tokens: The list of tokens. 1570 sql: The original SQL string, used to produce helpful debug messages. 1571 1572 Returns: 1573 The list of the produced syntax trees. 1574 """ 1575 return self._parse( 1576 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1577 ) 1578 1579 def parse_into( 1580 self, 1581 expression_types: exp.IntoType, 1582 raw_tokens: t.List[Token], 1583 sql: t.Optional[str] = None, 1584 ) -> t.List[t.Optional[exp.Expression]]: 1585 """ 1586 Parses a list of tokens into a given Expression type. If a collection of Expression 1587 types is given instead, this method will try to parse the token list into each one 1588 of them, stopping at the first for which the parsing succeeds. 1589 1590 Args: 1591 expression_types: The expression type(s) to try and parse the token list into. 1592 raw_tokens: The list of tokens. 1593 sql: The original SQL string, used to produce helpful debug messages. 1594 1595 Returns: 1596 The target Expression. 1597 """ 1598 errors = [] 1599 for expression_type in ensure_list(expression_types): 1600 parser = self.EXPRESSION_PARSERS.get(expression_type) 1601 if not parser: 1602 raise TypeError(f"No parser registered for {expression_type}") 1603 1604 try: 1605 return self._parse(parser, raw_tokens, sql) 1606 except ParseError as e: 1607 e.errors[0]["into_expression"] = expression_type 1608 errors.append(e) 1609 1610 raise ParseError( 1611 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1612 errors=merge_errors(errors), 1613 ) from errors[-1] 1614 1615 def _parse( 1616 self, 1617 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1618 raw_tokens: t.List[Token], 1619 sql: t.Optional[str] = None, 1620 ) -> t.List[t.Optional[exp.Expression]]: 1621 self.reset() 1622 self.sql = sql or "" 1623 1624 total = len(raw_tokens) 1625 chunks: t.List[t.List[Token]] = [[]] 1626 1627 for i, token in enumerate(raw_tokens): 1628 if token.token_type == TokenType.SEMICOLON: 1629 if token.comments: 1630 chunks.append([token]) 1631 1632 if i < total - 1: 1633 chunks.append([]) 1634 else: 1635 chunks[-1].append(token) 1636 1637 expressions = [] 1638 1639 for tokens in chunks: 1640 self._index = -1 1641 self._tokens = tokens 1642 self._advance() 1643 1644 expressions.append(parse_method(self)) 1645 1646 if self._index < len(self._tokens): 1647 self.raise_error("Invalid expression / Unexpected token") 1648 1649 self.check_errors() 1650 1651 return expressions 1652 1653 def check_errors(self) -> None: 1654 """Logs or raises any found errors, depending on the chosen error level setting.""" 1655 if self.error_level == ErrorLevel.WARN: 1656 for error in self.errors: 1657 logger.error(str(error)) 1658 elif self.error_level == ErrorLevel.RAISE and self.errors: 1659 raise ParseError( 1660 concat_messages(self.errors, self.max_errors), 1661 errors=merge_errors(self.errors), 1662 ) 1663 1664 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1665 """ 1666 Appends an error in the list of recorded errors or raises it, depending on the chosen 1667 error level setting. 1668 """ 1669 token = token or self._curr or self._prev or Token.string("") 1670 start = token.start 1671 end = token.end + 1 1672 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1673 highlight = self.sql[start:end] 1674 end_context = self.sql[end : end + self.error_message_context] 1675 1676 error = ParseError.new( 1677 f"{message}. Line {token.line}, Col: {token.col}.\n" 1678 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1679 description=message, 1680 line=token.line, 1681 col=token.col, 1682 start_context=start_context, 1683 highlight=highlight, 1684 end_context=end_context, 1685 ) 1686 1687 if self.error_level == ErrorLevel.IMMEDIATE: 1688 raise error 1689 1690 self.errors.append(error) 1691 1692 def expression( 1693 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1694 ) -> E: 1695 """ 1696 Creates a new, validated Expression. 1697 1698 Args: 1699 exp_class: The expression class to instantiate. 1700 comments: An optional list of comments to attach to the expression. 1701 kwargs: The arguments to set for the expression along with their respective values. 1702 1703 Returns: 1704 The target expression. 1705 """ 1706 instance = exp_class(**kwargs) 1707 instance.add_comments(comments) if comments else self._add_comments(instance) 1708 return self.validate_expression(instance) 1709 1710 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1711 if expression and self._prev_comments: 1712 expression.add_comments(self._prev_comments) 1713 self._prev_comments = None 1714 1715 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1716 """ 1717 Validates an Expression, making sure that all its mandatory arguments are set. 1718 1719 Args: 1720 expression: The expression to validate. 1721 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1722 1723 Returns: 1724 The validated expression. 1725 """ 1726 if self.error_level != ErrorLevel.IGNORE: 1727 for error_message in expression.error_messages(args): 1728 self.raise_error(error_message) 1729 1730 return expression 1731 1732 def _find_sql(self, start: Token, end: Token) -> str: 1733 return self.sql[start.start : end.end + 1] 1734 1735 def _is_connected(self) -> bool: 1736 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1737 1738 def _advance(self, times: int = 1) -> None: 1739 self._index += times 1740 self._curr = seq_get(self._tokens, self._index) 1741 self._next = seq_get(self._tokens, self._index + 1) 1742 1743 if self._index > 0: 1744 self._prev = self._tokens[self._index - 1] 1745 self._prev_comments = self._prev.comments 1746 else: 1747 self._prev = None 1748 self._prev_comments = None 1749 1750 def _retreat(self, index: int) -> None: 1751 if index != self._index: 1752 self._advance(index - self._index) 1753 1754 def _warn_unsupported(self) -> None: 1755 if len(self._tokens) <= 1: 1756 return 1757 1758 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1759 # interested in emitting a warning for the one being currently processed. 1760 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1761 1762 logger.warning( 1763 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1764 ) 1765 1766 def _parse_command(self) -> exp.Command: 1767 self._warn_unsupported() 1768 return self.expression( 1769 exp.Command, 1770 comments=self._prev_comments, 1771 this=self._prev.text.upper(), 1772 expression=self._parse_string(), 1773 ) 1774 1775 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1776 """ 1777 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1778 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1779 solve this by setting & resetting the parser state accordingly 1780 """ 1781 index = self._index 1782 error_level = self.error_level 1783 1784 self.error_level = ErrorLevel.IMMEDIATE 1785 try: 1786 this = parse_method() 1787 except ParseError: 1788 this = None 1789 finally: 1790 if not this or retreat: 1791 self._retreat(index) 1792 self.error_level = error_level 1793 1794 return this 1795 1796 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1797 start = self._prev 1798 exists = self._parse_exists() if allow_exists else None 1799 1800 self._match(TokenType.ON) 1801 1802 materialized = self._match_text_seq("MATERIALIZED") 1803 kind = self._match_set(self.CREATABLES) and self._prev 1804 if not kind: 1805 return self._parse_as_command(start) 1806 1807 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1808 this = self._parse_user_defined_function(kind=kind.token_type) 1809 elif kind.token_type == TokenType.TABLE: 1810 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1811 elif kind.token_type == TokenType.COLUMN: 1812 this = self._parse_column() 1813 else: 1814 this = self._parse_id_var() 1815 1816 self._match(TokenType.IS) 1817 1818 return self.expression( 1819 exp.Comment, 1820 this=this, 1821 kind=kind.text, 1822 expression=self._parse_string(), 1823 exists=exists, 1824 materialized=materialized, 1825 ) 1826 1827 def _parse_to_table( 1828 self, 1829 ) -> exp.ToTableProperty: 1830 table = self._parse_table_parts(schema=True) 1831 return self.expression(exp.ToTableProperty, this=table) 1832 1833 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1834 def _parse_ttl(self) -> exp.Expression: 1835 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1836 this = self._parse_bitwise() 1837 1838 if self._match_text_seq("DELETE"): 1839 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1840 if self._match_text_seq("RECOMPRESS"): 1841 return self.expression( 1842 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1843 ) 1844 if self._match_text_seq("TO", "DISK"): 1845 return self.expression( 1846 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1847 ) 1848 if self._match_text_seq("TO", "VOLUME"): 1849 return self.expression( 1850 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1851 ) 1852 1853 return this 1854 1855 expressions = self._parse_csv(_parse_ttl_action) 1856 where = self._parse_where() 1857 group = self._parse_group() 1858 1859 aggregates = None 1860 if group and self._match(TokenType.SET): 1861 aggregates = self._parse_csv(self._parse_set_item) 1862 1863 return self.expression( 1864 exp.MergeTreeTTL, 1865 expressions=expressions, 1866 where=where, 1867 group=group, 1868 aggregates=aggregates, 1869 ) 1870 1871 def _parse_statement(self) -> t.Optional[exp.Expression]: 1872 if self._curr is None: 1873 return None 1874 1875 if self._match_set(self.STATEMENT_PARSERS): 1876 comments = self._prev_comments 1877 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1878 stmt.add_comments(comments, prepend=True) 1879 return stmt 1880 1881 if self._match_set(self.dialect.tokenizer.COMMANDS): 1882 return self._parse_command() 1883 1884 expression = self._parse_expression() 1885 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1886 return self._parse_query_modifiers(expression) 1887 1888 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1889 start = self._prev 1890 temporary = self._match(TokenType.TEMPORARY) 1891 materialized = self._match_text_seq("MATERIALIZED") 1892 1893 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1894 if not kind: 1895 return self._parse_as_command(start) 1896 1897 concurrently = self._match_text_seq("CONCURRENTLY") 1898 if_exists = exists or self._parse_exists() 1899 1900 if kind == "COLUMN": 1901 this = self._parse_column() 1902 else: 1903 this = self._parse_table_parts( 1904 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1905 ) 1906 1907 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1908 1909 if self._match(TokenType.L_PAREN, advance=False): 1910 expressions = self._parse_wrapped_csv(self._parse_types) 1911 else: 1912 expressions = None 1913 1914 return self.expression( 1915 exp.Drop, 1916 exists=if_exists, 1917 this=this, 1918 expressions=expressions, 1919 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1920 temporary=temporary, 1921 materialized=materialized, 1922 cascade=self._match_text_seq("CASCADE"), 1923 constraints=self._match_text_seq("CONSTRAINTS"), 1924 purge=self._match_text_seq("PURGE"), 1925 cluster=cluster, 1926 concurrently=concurrently, 1927 ) 1928 1929 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1930 return ( 1931 self._match_text_seq("IF") 1932 and (not not_ or self._match(TokenType.NOT)) 1933 and self._match(TokenType.EXISTS) 1934 ) 1935 1936 def _parse_create(self) -> exp.Create | exp.Command: 1937 # Note: this can't be None because we've matched a statement parser 1938 start = self._prev 1939 1940 replace = ( 1941 start.token_type == TokenType.REPLACE 1942 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1943 or self._match_pair(TokenType.OR, TokenType.ALTER) 1944 ) 1945 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1946 1947 unique = self._match(TokenType.UNIQUE) 1948 1949 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1950 clustered = True 1951 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1952 "COLUMNSTORE" 1953 ): 1954 clustered = False 1955 else: 1956 clustered = None 1957 1958 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1959 self._advance() 1960 1961 properties = None 1962 create_token = self._match_set(self.CREATABLES) and self._prev 1963 1964 if not create_token: 1965 # exp.Properties.Location.POST_CREATE 1966 properties = self._parse_properties() 1967 create_token = self._match_set(self.CREATABLES) and self._prev 1968 1969 if not properties or not create_token: 1970 return self._parse_as_command(start) 1971 1972 concurrently = self._match_text_seq("CONCURRENTLY") 1973 exists = self._parse_exists(not_=True) 1974 this = None 1975 expression: t.Optional[exp.Expression] = None 1976 indexes = None 1977 no_schema_binding = None 1978 begin = None 1979 end = None 1980 clone = None 1981 1982 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1983 nonlocal properties 1984 if properties and temp_props: 1985 properties.expressions.extend(temp_props.expressions) 1986 elif temp_props: 1987 properties = temp_props 1988 1989 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1990 this = self._parse_user_defined_function(kind=create_token.token_type) 1991 1992 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1993 extend_props(self._parse_properties()) 1994 1995 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1996 extend_props(self._parse_properties()) 1997 1998 if not expression: 1999 if self._match(TokenType.COMMAND): 2000 expression = self._parse_as_command(self._prev) 2001 else: 2002 begin = self._match(TokenType.BEGIN) 2003 return_ = self._match_text_seq("RETURN") 2004 2005 if self._match(TokenType.STRING, advance=False): 2006 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2007 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2008 expression = self._parse_string() 2009 extend_props(self._parse_properties()) 2010 else: 2011 expression = self._parse_user_defined_function_expression() 2012 2013 end = self._match_text_seq("END") 2014 2015 if return_: 2016 expression = self.expression(exp.Return, this=expression) 2017 elif create_token.token_type == TokenType.INDEX: 2018 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2019 if not self._match(TokenType.ON): 2020 index = self._parse_id_var() 2021 anonymous = False 2022 else: 2023 index = None 2024 anonymous = True 2025 2026 this = self._parse_index(index=index, anonymous=anonymous) 2027 elif create_token.token_type in self.DB_CREATABLES: 2028 table_parts = self._parse_table_parts( 2029 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2030 ) 2031 2032 # exp.Properties.Location.POST_NAME 2033 self._match(TokenType.COMMA) 2034 extend_props(self._parse_properties(before=True)) 2035 2036 this = self._parse_schema(this=table_parts) 2037 2038 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2039 extend_props(self._parse_properties()) 2040 2041 has_alias = self._match(TokenType.ALIAS) 2042 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2043 # exp.Properties.Location.POST_ALIAS 2044 extend_props(self._parse_properties()) 2045 2046 if create_token.token_type == TokenType.SEQUENCE: 2047 expression = self._parse_types() 2048 extend_props(self._parse_properties()) 2049 else: 2050 expression = self._parse_ddl_select() 2051 2052 # Some dialects also support using a table as an alias instead of a SELECT. 2053 # Here we fallback to this as an alternative. 2054 if not expression and has_alias: 2055 expression = self._try_parse(self._parse_table_parts) 2056 2057 if create_token.token_type == TokenType.TABLE: 2058 # exp.Properties.Location.POST_EXPRESSION 2059 extend_props(self._parse_properties()) 2060 2061 indexes = [] 2062 while True: 2063 index = self._parse_index() 2064 2065 # exp.Properties.Location.POST_INDEX 2066 extend_props(self._parse_properties()) 2067 if not index: 2068 break 2069 else: 2070 self._match(TokenType.COMMA) 2071 indexes.append(index) 2072 elif create_token.token_type == TokenType.VIEW: 2073 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2074 no_schema_binding = True 2075 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2076 extend_props(self._parse_properties()) 2077 2078 shallow = self._match_text_seq("SHALLOW") 2079 2080 if self._match_texts(self.CLONE_KEYWORDS): 2081 copy = self._prev.text.lower() == "copy" 2082 clone = self.expression( 2083 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2084 ) 2085 2086 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2087 return self._parse_as_command(start) 2088 2089 create_kind_text = create_token.text.upper() 2090 return self.expression( 2091 exp.Create, 2092 this=this, 2093 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2094 replace=replace, 2095 refresh=refresh, 2096 unique=unique, 2097 expression=expression, 2098 exists=exists, 2099 properties=properties, 2100 indexes=indexes, 2101 no_schema_binding=no_schema_binding, 2102 begin=begin, 2103 end=end, 2104 clone=clone, 2105 concurrently=concurrently, 2106 clustered=clustered, 2107 ) 2108 2109 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2110 seq = exp.SequenceProperties() 2111 2112 options = [] 2113 index = self._index 2114 2115 while self._curr: 2116 self._match(TokenType.COMMA) 2117 if self._match_text_seq("INCREMENT"): 2118 self._match_text_seq("BY") 2119 self._match_text_seq("=") 2120 seq.set("increment", self._parse_term()) 2121 elif self._match_text_seq("MINVALUE"): 2122 seq.set("minvalue", self._parse_term()) 2123 elif self._match_text_seq("MAXVALUE"): 2124 seq.set("maxvalue", self._parse_term()) 2125 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2126 self._match_text_seq("=") 2127 seq.set("start", self._parse_term()) 2128 elif self._match_text_seq("CACHE"): 2129 # T-SQL allows empty CACHE which is initialized dynamically 2130 seq.set("cache", self._parse_number() or True) 2131 elif self._match_text_seq("OWNED", "BY"): 2132 # "OWNED BY NONE" is the default 2133 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2134 else: 2135 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2136 if opt: 2137 options.append(opt) 2138 else: 2139 break 2140 2141 seq.set("options", options if options else None) 2142 return None if self._index == index else seq 2143 2144 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2145 # only used for teradata currently 2146 self._match(TokenType.COMMA) 2147 2148 kwargs = { 2149 "no": self._match_text_seq("NO"), 2150 "dual": self._match_text_seq("DUAL"), 2151 "before": self._match_text_seq("BEFORE"), 2152 "default": self._match_text_seq("DEFAULT"), 2153 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2154 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2155 "after": self._match_text_seq("AFTER"), 2156 "minimum": self._match_texts(("MIN", "MINIMUM")), 2157 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2158 } 2159 2160 if self._match_texts(self.PROPERTY_PARSERS): 2161 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2162 try: 2163 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2164 except TypeError: 2165 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2166 2167 return None 2168 2169 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2170 return self._parse_wrapped_csv(self._parse_property) 2171 2172 def _parse_property(self) -> t.Optional[exp.Expression]: 2173 if self._match_texts(self.PROPERTY_PARSERS): 2174 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2175 2176 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2177 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2178 2179 if self._match_text_seq("COMPOUND", "SORTKEY"): 2180 return self._parse_sortkey(compound=True) 2181 2182 if self._match_text_seq("SQL", "SECURITY"): 2183 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2184 2185 index = self._index 2186 key = self._parse_column() 2187 2188 if not self._match(TokenType.EQ): 2189 self._retreat(index) 2190 return self._parse_sequence_properties() 2191 2192 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2193 if isinstance(key, exp.Column): 2194 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2195 2196 value = self._parse_bitwise() or self._parse_var(any_token=True) 2197 2198 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2199 if isinstance(value, exp.Column): 2200 value = exp.var(value.name) 2201 2202 return self.expression(exp.Property, this=key, value=value) 2203 2204 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2205 if self._match_text_seq("BY"): 2206 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2207 2208 self._match(TokenType.ALIAS) 2209 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2210 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2211 2212 return self.expression( 2213 exp.FileFormatProperty, 2214 this=( 2215 self.expression( 2216 exp.InputOutputFormat, 2217 input_format=input_format, 2218 output_format=output_format, 2219 ) 2220 if input_format or output_format 2221 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2222 ), 2223 ) 2224 2225 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2226 field = self._parse_field() 2227 if isinstance(field, exp.Identifier) and not field.quoted: 2228 field = exp.var(field) 2229 2230 return field 2231 2232 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2233 self._match(TokenType.EQ) 2234 self._match(TokenType.ALIAS) 2235 2236 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2237 2238 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2239 properties = [] 2240 while True: 2241 if before: 2242 prop = self._parse_property_before() 2243 else: 2244 prop = self._parse_property() 2245 if not prop: 2246 break 2247 for p in ensure_list(prop): 2248 properties.append(p) 2249 2250 if properties: 2251 return self.expression(exp.Properties, expressions=properties) 2252 2253 return None 2254 2255 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2256 return self.expression( 2257 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2258 ) 2259 2260 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2261 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2262 security_specifier = self._prev.text.upper() 2263 return self.expression(exp.SecurityProperty, this=security_specifier) 2264 return None 2265 2266 def _parse_settings_property(self) -> exp.SettingsProperty: 2267 return self.expression( 2268 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2269 ) 2270 2271 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2272 if self._index >= 2: 2273 pre_volatile_token = self._tokens[self._index - 2] 2274 else: 2275 pre_volatile_token = None 2276 2277 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2278 return exp.VolatileProperty() 2279 2280 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2281 2282 def _parse_retention_period(self) -> exp.Var: 2283 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2284 number = self._parse_number() 2285 number_str = f"{number} " if number else "" 2286 unit = self._parse_var(any_token=True) 2287 return exp.var(f"{number_str}{unit}") 2288 2289 def _parse_system_versioning_property( 2290 self, with_: bool = False 2291 ) -> exp.WithSystemVersioningProperty: 2292 self._match(TokenType.EQ) 2293 prop = self.expression( 2294 exp.WithSystemVersioningProperty, 2295 **{ # type: ignore 2296 "on": True, 2297 "with": with_, 2298 }, 2299 ) 2300 2301 if self._match_text_seq("OFF"): 2302 prop.set("on", False) 2303 return prop 2304 2305 self._match(TokenType.ON) 2306 if self._match(TokenType.L_PAREN): 2307 while self._curr and not self._match(TokenType.R_PAREN): 2308 if self._match_text_seq("HISTORY_TABLE", "="): 2309 prop.set("this", self._parse_table_parts()) 2310 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2311 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2312 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2313 prop.set("retention_period", self._parse_retention_period()) 2314 2315 self._match(TokenType.COMMA) 2316 2317 return prop 2318 2319 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2320 self._match(TokenType.EQ) 2321 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2322 prop = self.expression(exp.DataDeletionProperty, on=on) 2323 2324 if self._match(TokenType.L_PAREN): 2325 while self._curr and not self._match(TokenType.R_PAREN): 2326 if self._match_text_seq("FILTER_COLUMN", "="): 2327 prop.set("filter_column", self._parse_column()) 2328 elif self._match_text_seq("RETENTION_PERIOD", "="): 2329 prop.set("retention_period", self._parse_retention_period()) 2330 2331 self._match(TokenType.COMMA) 2332 2333 return prop 2334 2335 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2336 kind = "HASH" 2337 expressions: t.Optional[t.List[exp.Expression]] = None 2338 if self._match_text_seq("BY", "HASH"): 2339 expressions = self._parse_wrapped_csv(self._parse_id_var) 2340 elif self._match_text_seq("BY", "RANDOM"): 2341 kind = "RANDOM" 2342 2343 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2344 buckets: t.Optional[exp.Expression] = None 2345 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2346 buckets = self._parse_number() 2347 2348 return self.expression( 2349 exp.DistributedByProperty, 2350 expressions=expressions, 2351 kind=kind, 2352 buckets=buckets, 2353 order=self._parse_order(), 2354 ) 2355 2356 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2357 self._match_text_seq("KEY") 2358 expressions = self._parse_wrapped_id_vars() 2359 return self.expression(expr_type, expressions=expressions) 2360 2361 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2362 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2363 prop = self._parse_system_versioning_property(with_=True) 2364 self._match_r_paren() 2365 return prop 2366 2367 if self._match(TokenType.L_PAREN, advance=False): 2368 return self._parse_wrapped_properties() 2369 2370 if self._match_text_seq("JOURNAL"): 2371 return self._parse_withjournaltable() 2372 2373 if self._match_texts(self.VIEW_ATTRIBUTES): 2374 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2375 2376 if self._match_text_seq("DATA"): 2377 return self._parse_withdata(no=False) 2378 elif self._match_text_seq("NO", "DATA"): 2379 return self._parse_withdata(no=True) 2380 2381 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2382 return self._parse_serde_properties(with_=True) 2383 2384 if self._match(TokenType.SCHEMA): 2385 return self.expression( 2386 exp.WithSchemaBindingProperty, 2387 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2388 ) 2389 2390 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2391 return self.expression( 2392 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2393 ) 2394 2395 if not self._next: 2396 return None 2397 2398 return self._parse_withisolatedloading() 2399 2400 def _parse_procedure_option(self) -> exp.Expression | None: 2401 if self._match_text_seq("EXECUTE", "AS"): 2402 return self.expression( 2403 exp.ExecuteAsProperty, 2404 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2405 or self._parse_string(), 2406 ) 2407 2408 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2409 2410 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2411 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2412 self._match(TokenType.EQ) 2413 2414 user = self._parse_id_var() 2415 self._match(TokenType.PARAMETER) 2416 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2417 2418 if not user or not host: 2419 return None 2420 2421 return exp.DefinerProperty(this=f"{user}@{host}") 2422 2423 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2424 self._match(TokenType.TABLE) 2425 self._match(TokenType.EQ) 2426 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2427 2428 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2429 return self.expression(exp.LogProperty, no=no) 2430 2431 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2432 return self.expression(exp.JournalProperty, **kwargs) 2433 2434 def _parse_checksum(self) -> exp.ChecksumProperty: 2435 self._match(TokenType.EQ) 2436 2437 on = None 2438 if self._match(TokenType.ON): 2439 on = True 2440 elif self._match_text_seq("OFF"): 2441 on = False 2442 2443 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2444 2445 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2446 return self.expression( 2447 exp.Cluster, 2448 expressions=( 2449 self._parse_wrapped_csv(self._parse_ordered) 2450 if wrapped 2451 else self._parse_csv(self._parse_ordered) 2452 ), 2453 ) 2454 2455 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2456 self._match_text_seq("BY") 2457 2458 self._match_l_paren() 2459 expressions = self._parse_csv(self._parse_column) 2460 self._match_r_paren() 2461 2462 if self._match_text_seq("SORTED", "BY"): 2463 self._match_l_paren() 2464 sorted_by = self._parse_csv(self._parse_ordered) 2465 self._match_r_paren() 2466 else: 2467 sorted_by = None 2468 2469 self._match(TokenType.INTO) 2470 buckets = self._parse_number() 2471 self._match_text_seq("BUCKETS") 2472 2473 return self.expression( 2474 exp.ClusteredByProperty, 2475 expressions=expressions, 2476 sorted_by=sorted_by, 2477 buckets=buckets, 2478 ) 2479 2480 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2481 if not self._match_text_seq("GRANTS"): 2482 self._retreat(self._index - 1) 2483 return None 2484 2485 return self.expression(exp.CopyGrantsProperty) 2486 2487 def _parse_freespace(self) -> exp.FreespaceProperty: 2488 self._match(TokenType.EQ) 2489 return self.expression( 2490 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2491 ) 2492 2493 def _parse_mergeblockratio( 2494 self, no: bool = False, default: bool = False 2495 ) -> exp.MergeBlockRatioProperty: 2496 if self._match(TokenType.EQ): 2497 return self.expression( 2498 exp.MergeBlockRatioProperty, 2499 this=self._parse_number(), 2500 percent=self._match(TokenType.PERCENT), 2501 ) 2502 2503 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2504 2505 def _parse_datablocksize( 2506 self, 2507 default: t.Optional[bool] = None, 2508 minimum: t.Optional[bool] = None, 2509 maximum: t.Optional[bool] = None, 2510 ) -> exp.DataBlocksizeProperty: 2511 self._match(TokenType.EQ) 2512 size = self._parse_number() 2513 2514 units = None 2515 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2516 units = self._prev.text 2517 2518 return self.expression( 2519 exp.DataBlocksizeProperty, 2520 size=size, 2521 units=units, 2522 default=default, 2523 minimum=minimum, 2524 maximum=maximum, 2525 ) 2526 2527 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2528 self._match(TokenType.EQ) 2529 always = self._match_text_seq("ALWAYS") 2530 manual = self._match_text_seq("MANUAL") 2531 never = self._match_text_seq("NEVER") 2532 default = self._match_text_seq("DEFAULT") 2533 2534 autotemp = None 2535 if self._match_text_seq("AUTOTEMP"): 2536 autotemp = self._parse_schema() 2537 2538 return self.expression( 2539 exp.BlockCompressionProperty, 2540 always=always, 2541 manual=manual, 2542 never=never, 2543 default=default, 2544 autotemp=autotemp, 2545 ) 2546 2547 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2548 index = self._index 2549 no = self._match_text_seq("NO") 2550 concurrent = self._match_text_seq("CONCURRENT") 2551 2552 if not self._match_text_seq("ISOLATED", "LOADING"): 2553 self._retreat(index) 2554 return None 2555 2556 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2557 return self.expression( 2558 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2559 ) 2560 2561 def _parse_locking(self) -> exp.LockingProperty: 2562 if self._match(TokenType.TABLE): 2563 kind = "TABLE" 2564 elif self._match(TokenType.VIEW): 2565 kind = "VIEW" 2566 elif self._match(TokenType.ROW): 2567 kind = "ROW" 2568 elif self._match_text_seq("DATABASE"): 2569 kind = "DATABASE" 2570 else: 2571 kind = None 2572 2573 if kind in ("DATABASE", "TABLE", "VIEW"): 2574 this = self._parse_table_parts() 2575 else: 2576 this = None 2577 2578 if self._match(TokenType.FOR): 2579 for_or_in = "FOR" 2580 elif self._match(TokenType.IN): 2581 for_or_in = "IN" 2582 else: 2583 for_or_in = None 2584 2585 if self._match_text_seq("ACCESS"): 2586 lock_type = "ACCESS" 2587 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2588 lock_type = "EXCLUSIVE" 2589 elif self._match_text_seq("SHARE"): 2590 lock_type = "SHARE" 2591 elif self._match_text_seq("READ"): 2592 lock_type = "READ" 2593 elif self._match_text_seq("WRITE"): 2594 lock_type = "WRITE" 2595 elif self._match_text_seq("CHECKSUM"): 2596 lock_type = "CHECKSUM" 2597 else: 2598 lock_type = None 2599 2600 override = self._match_text_seq("OVERRIDE") 2601 2602 return self.expression( 2603 exp.LockingProperty, 2604 this=this, 2605 kind=kind, 2606 for_or_in=for_or_in, 2607 lock_type=lock_type, 2608 override=override, 2609 ) 2610 2611 def _parse_partition_by(self) -> t.List[exp.Expression]: 2612 if self._match(TokenType.PARTITION_BY): 2613 return self._parse_csv(self._parse_assignment) 2614 return [] 2615 2616 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2617 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2618 if self._match_text_seq("MINVALUE"): 2619 return exp.var("MINVALUE") 2620 if self._match_text_seq("MAXVALUE"): 2621 return exp.var("MAXVALUE") 2622 return self._parse_bitwise() 2623 2624 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2625 expression = None 2626 from_expressions = None 2627 to_expressions = None 2628 2629 if self._match(TokenType.IN): 2630 this = self._parse_wrapped_csv(self._parse_bitwise) 2631 elif self._match(TokenType.FROM): 2632 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2633 self._match_text_seq("TO") 2634 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2635 elif self._match_text_seq("WITH", "(", "MODULUS"): 2636 this = self._parse_number() 2637 self._match_text_seq(",", "REMAINDER") 2638 expression = self._parse_number() 2639 self._match_r_paren() 2640 else: 2641 self.raise_error("Failed to parse partition bound spec.") 2642 2643 return self.expression( 2644 exp.PartitionBoundSpec, 2645 this=this, 2646 expression=expression, 2647 from_expressions=from_expressions, 2648 to_expressions=to_expressions, 2649 ) 2650 2651 # https://www.postgresql.org/docs/current/sql-createtable.html 2652 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2653 if not self._match_text_seq("OF"): 2654 self._retreat(self._index - 1) 2655 return None 2656 2657 this = self._parse_table(schema=True) 2658 2659 if self._match(TokenType.DEFAULT): 2660 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2661 elif self._match_text_seq("FOR", "VALUES"): 2662 expression = self._parse_partition_bound_spec() 2663 else: 2664 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2665 2666 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2667 2668 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2669 self._match(TokenType.EQ) 2670 return self.expression( 2671 exp.PartitionedByProperty, 2672 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2673 ) 2674 2675 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2676 if self._match_text_seq("AND", "STATISTICS"): 2677 statistics = True 2678 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2679 statistics = False 2680 else: 2681 statistics = None 2682 2683 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2684 2685 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2686 if self._match_text_seq("SQL"): 2687 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2688 return None 2689 2690 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2691 if self._match_text_seq("SQL", "DATA"): 2692 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2693 return None 2694 2695 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2696 if self._match_text_seq("PRIMARY", "INDEX"): 2697 return exp.NoPrimaryIndexProperty() 2698 if self._match_text_seq("SQL"): 2699 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2700 return None 2701 2702 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2703 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2704 return exp.OnCommitProperty() 2705 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2706 return exp.OnCommitProperty(delete=True) 2707 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2708 2709 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2710 if self._match_text_seq("SQL", "DATA"): 2711 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2712 return None 2713 2714 def _parse_distkey(self) -> exp.DistKeyProperty: 2715 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2716 2717 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2718 table = self._parse_table(schema=True) 2719 2720 options = [] 2721 while self._match_texts(("INCLUDING", "EXCLUDING")): 2722 this = self._prev.text.upper() 2723 2724 id_var = self._parse_id_var() 2725 if not id_var: 2726 return None 2727 2728 options.append( 2729 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2730 ) 2731 2732 return self.expression(exp.LikeProperty, this=table, expressions=options) 2733 2734 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2735 return self.expression( 2736 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2737 ) 2738 2739 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2740 self._match(TokenType.EQ) 2741 return self.expression( 2742 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2743 ) 2744 2745 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2746 self._match_text_seq("WITH", "CONNECTION") 2747 return self.expression( 2748 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2749 ) 2750 2751 def _parse_returns(self) -> exp.ReturnsProperty: 2752 value: t.Optional[exp.Expression] 2753 null = None 2754 is_table = self._match(TokenType.TABLE) 2755 2756 if is_table: 2757 if self._match(TokenType.LT): 2758 value = self.expression( 2759 exp.Schema, 2760 this="TABLE", 2761 expressions=self._parse_csv(self._parse_struct_types), 2762 ) 2763 if not self._match(TokenType.GT): 2764 self.raise_error("Expecting >") 2765 else: 2766 value = self._parse_schema(exp.var("TABLE")) 2767 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2768 null = True 2769 value = None 2770 else: 2771 value = self._parse_types() 2772 2773 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2774 2775 def _parse_describe(self) -> exp.Describe: 2776 kind = self._match_set(self.CREATABLES) and self._prev.text 2777 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2778 if self._match(TokenType.DOT): 2779 style = None 2780 self._retreat(self._index - 2) 2781 2782 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2783 2784 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2785 this = self._parse_statement() 2786 else: 2787 this = self._parse_table(schema=True) 2788 2789 properties = self._parse_properties() 2790 expressions = properties.expressions if properties else None 2791 partition = self._parse_partition() 2792 return self.expression( 2793 exp.Describe, 2794 this=this, 2795 style=style, 2796 kind=kind, 2797 expressions=expressions, 2798 partition=partition, 2799 format=format, 2800 ) 2801 2802 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2803 kind = self._prev.text.upper() 2804 expressions = [] 2805 2806 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2807 if self._match(TokenType.WHEN): 2808 expression = self._parse_disjunction() 2809 self._match(TokenType.THEN) 2810 else: 2811 expression = None 2812 2813 else_ = self._match(TokenType.ELSE) 2814 2815 if not self._match(TokenType.INTO): 2816 return None 2817 2818 return self.expression( 2819 exp.ConditionalInsert, 2820 this=self.expression( 2821 exp.Insert, 2822 this=self._parse_table(schema=True), 2823 expression=self._parse_derived_table_values(), 2824 ), 2825 expression=expression, 2826 else_=else_, 2827 ) 2828 2829 expression = parse_conditional_insert() 2830 while expression is not None: 2831 expressions.append(expression) 2832 expression = parse_conditional_insert() 2833 2834 return self.expression( 2835 exp.MultitableInserts, 2836 kind=kind, 2837 comments=comments, 2838 expressions=expressions, 2839 source=self._parse_table(), 2840 ) 2841 2842 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2843 comments = [] 2844 hint = self._parse_hint() 2845 overwrite = self._match(TokenType.OVERWRITE) 2846 ignore = self._match(TokenType.IGNORE) 2847 local = self._match_text_seq("LOCAL") 2848 alternative = None 2849 is_function = None 2850 2851 if self._match_text_seq("DIRECTORY"): 2852 this: t.Optional[exp.Expression] = self.expression( 2853 exp.Directory, 2854 this=self._parse_var_or_string(), 2855 local=local, 2856 row_format=self._parse_row_format(match_row=True), 2857 ) 2858 else: 2859 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2860 comments += ensure_list(self._prev_comments) 2861 return self._parse_multitable_inserts(comments) 2862 2863 if self._match(TokenType.OR): 2864 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2865 2866 self._match(TokenType.INTO) 2867 comments += ensure_list(self._prev_comments) 2868 self._match(TokenType.TABLE) 2869 is_function = self._match(TokenType.FUNCTION) 2870 2871 this = ( 2872 self._parse_table(schema=True, parse_partition=True) 2873 if not is_function 2874 else self._parse_function() 2875 ) 2876 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2877 this.set("alias", self._parse_table_alias()) 2878 2879 returning = self._parse_returning() 2880 2881 return self.expression( 2882 exp.Insert, 2883 comments=comments, 2884 hint=hint, 2885 is_function=is_function, 2886 this=this, 2887 stored=self._match_text_seq("STORED") and self._parse_stored(), 2888 by_name=self._match_text_seq("BY", "NAME"), 2889 exists=self._parse_exists(), 2890 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2891 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2892 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2893 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2894 conflict=self._parse_on_conflict(), 2895 returning=returning or self._parse_returning(), 2896 overwrite=overwrite, 2897 alternative=alternative, 2898 ignore=ignore, 2899 source=self._match(TokenType.TABLE) and self._parse_table(), 2900 ) 2901 2902 def _parse_kill(self) -> exp.Kill: 2903 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2904 2905 return self.expression( 2906 exp.Kill, 2907 this=self._parse_primary(), 2908 kind=kind, 2909 ) 2910 2911 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2912 conflict = self._match_text_seq("ON", "CONFLICT") 2913 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2914 2915 if not conflict and not duplicate: 2916 return None 2917 2918 conflict_keys = None 2919 constraint = None 2920 2921 if conflict: 2922 if self._match_text_seq("ON", "CONSTRAINT"): 2923 constraint = self._parse_id_var() 2924 elif self._match(TokenType.L_PAREN): 2925 conflict_keys = self._parse_csv(self._parse_id_var) 2926 self._match_r_paren() 2927 2928 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2929 if self._prev.token_type == TokenType.UPDATE: 2930 self._match(TokenType.SET) 2931 expressions = self._parse_csv(self._parse_equality) 2932 else: 2933 expressions = None 2934 2935 return self.expression( 2936 exp.OnConflict, 2937 duplicate=duplicate, 2938 expressions=expressions, 2939 action=action, 2940 conflict_keys=conflict_keys, 2941 constraint=constraint, 2942 where=self._parse_where(), 2943 ) 2944 2945 def _parse_returning(self) -> t.Optional[exp.Returning]: 2946 if not self._match(TokenType.RETURNING): 2947 return None 2948 return self.expression( 2949 exp.Returning, 2950 expressions=self._parse_csv(self._parse_expression), 2951 into=self._match(TokenType.INTO) and self._parse_table_part(), 2952 ) 2953 2954 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2955 if not self._match(TokenType.FORMAT): 2956 return None 2957 return self._parse_row_format() 2958 2959 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2960 index = self._index 2961 with_ = with_ or self._match_text_seq("WITH") 2962 2963 if not self._match(TokenType.SERDE_PROPERTIES): 2964 self._retreat(index) 2965 return None 2966 return self.expression( 2967 exp.SerdeProperties, 2968 **{ # type: ignore 2969 "expressions": self._parse_wrapped_properties(), 2970 "with": with_, 2971 }, 2972 ) 2973 2974 def _parse_row_format( 2975 self, match_row: bool = False 2976 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2977 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2978 return None 2979 2980 if self._match_text_seq("SERDE"): 2981 this = self._parse_string() 2982 2983 serde_properties = self._parse_serde_properties() 2984 2985 return self.expression( 2986 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2987 ) 2988 2989 self._match_text_seq("DELIMITED") 2990 2991 kwargs = {} 2992 2993 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2994 kwargs["fields"] = self._parse_string() 2995 if self._match_text_seq("ESCAPED", "BY"): 2996 kwargs["escaped"] = self._parse_string() 2997 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2998 kwargs["collection_items"] = self._parse_string() 2999 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3000 kwargs["map_keys"] = self._parse_string() 3001 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3002 kwargs["lines"] = self._parse_string() 3003 if self._match_text_seq("NULL", "DEFINED", "AS"): 3004 kwargs["null"] = self._parse_string() 3005 3006 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3007 3008 def _parse_load(self) -> exp.LoadData | exp.Command: 3009 if self._match_text_seq("DATA"): 3010 local = self._match_text_seq("LOCAL") 3011 self._match_text_seq("INPATH") 3012 inpath = self._parse_string() 3013 overwrite = self._match(TokenType.OVERWRITE) 3014 self._match_pair(TokenType.INTO, TokenType.TABLE) 3015 3016 return self.expression( 3017 exp.LoadData, 3018 this=self._parse_table(schema=True), 3019 local=local, 3020 overwrite=overwrite, 3021 inpath=inpath, 3022 partition=self._parse_partition(), 3023 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3024 serde=self._match_text_seq("SERDE") and self._parse_string(), 3025 ) 3026 return self._parse_as_command(self._prev) 3027 3028 def _parse_delete(self) -> exp.Delete: 3029 # This handles MySQL's "Multiple-Table Syntax" 3030 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3031 tables = None 3032 if not self._match(TokenType.FROM, advance=False): 3033 tables = self._parse_csv(self._parse_table) or None 3034 3035 returning = self._parse_returning() 3036 3037 return self.expression( 3038 exp.Delete, 3039 tables=tables, 3040 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3041 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3042 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3043 where=self._parse_where(), 3044 returning=returning or self._parse_returning(), 3045 limit=self._parse_limit(), 3046 ) 3047 3048 def _parse_update(self) -> exp.Update: 3049 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3050 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3051 returning = self._parse_returning() 3052 return self.expression( 3053 exp.Update, 3054 **{ # type: ignore 3055 "this": this, 3056 "expressions": expressions, 3057 "from": self._parse_from(joins=True), 3058 "where": self._parse_where(), 3059 "returning": returning or self._parse_returning(), 3060 "order": self._parse_order(), 3061 "limit": self._parse_limit(), 3062 }, 3063 ) 3064 3065 def _parse_use(self) -> exp.Use: 3066 return self.expression( 3067 exp.Use, 3068 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3069 this=self._parse_table(schema=False), 3070 ) 3071 3072 def _parse_uncache(self) -> exp.Uncache: 3073 if not self._match(TokenType.TABLE): 3074 self.raise_error("Expecting TABLE after UNCACHE") 3075 3076 return self.expression( 3077 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3078 ) 3079 3080 def _parse_cache(self) -> exp.Cache: 3081 lazy = self._match_text_seq("LAZY") 3082 self._match(TokenType.TABLE) 3083 table = self._parse_table(schema=True) 3084 3085 options = [] 3086 if self._match_text_seq("OPTIONS"): 3087 self._match_l_paren() 3088 k = self._parse_string() 3089 self._match(TokenType.EQ) 3090 v = self._parse_string() 3091 options = [k, v] 3092 self._match_r_paren() 3093 3094 self._match(TokenType.ALIAS) 3095 return self.expression( 3096 exp.Cache, 3097 this=table, 3098 lazy=lazy, 3099 options=options, 3100 expression=self._parse_select(nested=True), 3101 ) 3102 3103 def _parse_partition(self) -> t.Optional[exp.Partition]: 3104 if not self._match_texts(self.PARTITION_KEYWORDS): 3105 return None 3106 3107 return self.expression( 3108 exp.Partition, 3109 subpartition=self._prev.text.upper() == "SUBPARTITION", 3110 expressions=self._parse_wrapped_csv(self._parse_assignment), 3111 ) 3112 3113 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3114 def _parse_value_expression() -> t.Optional[exp.Expression]: 3115 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3116 return exp.var(self._prev.text.upper()) 3117 return self._parse_expression() 3118 3119 if self._match(TokenType.L_PAREN): 3120 expressions = self._parse_csv(_parse_value_expression) 3121 self._match_r_paren() 3122 return self.expression(exp.Tuple, expressions=expressions) 3123 3124 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3125 expression = self._parse_expression() 3126 if expression: 3127 return self.expression(exp.Tuple, expressions=[expression]) 3128 return None 3129 3130 def _parse_projections(self) -> t.List[exp.Expression]: 3131 return self._parse_expressions() 3132 3133 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3134 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3135 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3136 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3137 ) 3138 elif self._match(TokenType.FROM): 3139 from_ = self._parse_from(skip_from_token=True) 3140 # Support parentheses for duckdb FROM-first syntax 3141 select = self._parse_select() 3142 if select: 3143 select.set("from", from_) 3144 this = select 3145 else: 3146 this = exp.select("*").from_(t.cast(exp.From, from_)) 3147 else: 3148 this = ( 3149 self._parse_table() 3150 if table 3151 else self._parse_select(nested=True, parse_set_operation=False) 3152 ) 3153 3154 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3155 # in case a modifier (e.g. join) is following 3156 if table and isinstance(this, exp.Values) and this.alias: 3157 alias = this.args["alias"].pop() 3158 this = exp.Table(this=this, alias=alias) 3159 3160 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3161 3162 return this 3163 3164 def _parse_select( 3165 self, 3166 nested: bool = False, 3167 table: bool = False, 3168 parse_subquery_alias: bool = True, 3169 parse_set_operation: bool = True, 3170 ) -> t.Optional[exp.Expression]: 3171 cte = self._parse_with() 3172 3173 if cte: 3174 this = self._parse_statement() 3175 3176 if not this: 3177 self.raise_error("Failed to parse any statement following CTE") 3178 return cte 3179 3180 if "with" in this.arg_types: 3181 this.set("with", cte) 3182 else: 3183 self.raise_error(f"{this.key} does not support CTE") 3184 this = cte 3185 3186 return this 3187 3188 # duckdb supports leading with FROM x 3189 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3190 3191 if self._match(TokenType.SELECT): 3192 comments = self._prev_comments 3193 3194 hint = self._parse_hint() 3195 3196 if self._next and not self._next.token_type == TokenType.DOT: 3197 all_ = self._match(TokenType.ALL) 3198 distinct = self._match_set(self.DISTINCT_TOKENS) 3199 else: 3200 all_, distinct = None, None 3201 3202 kind = ( 3203 self._match(TokenType.ALIAS) 3204 and self._match_texts(("STRUCT", "VALUE")) 3205 and self._prev.text.upper() 3206 ) 3207 3208 if distinct: 3209 distinct = self.expression( 3210 exp.Distinct, 3211 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3212 ) 3213 3214 if all_ and distinct: 3215 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3216 3217 operation_modifiers = [] 3218 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3219 operation_modifiers.append(exp.var(self._prev.text.upper())) 3220 3221 limit = self._parse_limit(top=True) 3222 projections = self._parse_projections() 3223 3224 this = self.expression( 3225 exp.Select, 3226 kind=kind, 3227 hint=hint, 3228 distinct=distinct, 3229 expressions=projections, 3230 limit=limit, 3231 operation_modifiers=operation_modifiers or None, 3232 ) 3233 this.comments = comments 3234 3235 into = self._parse_into() 3236 if into: 3237 this.set("into", into) 3238 3239 if not from_: 3240 from_ = self._parse_from() 3241 3242 if from_: 3243 this.set("from", from_) 3244 3245 this = self._parse_query_modifiers(this) 3246 elif (table or nested) and self._match(TokenType.L_PAREN): 3247 this = self._parse_wrapped_select(table=table) 3248 3249 # We return early here so that the UNION isn't attached to the subquery by the 3250 # following call to _parse_set_operations, but instead becomes the parent node 3251 self._match_r_paren() 3252 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3253 elif self._match(TokenType.VALUES, advance=False): 3254 this = self._parse_derived_table_values() 3255 elif from_: 3256 if self._match(TokenType.PIPE_GT, advance=False): 3257 return self._parse_pipe_syntax_query( 3258 exp.Select().from_(from_.this, append=False, copy=False) 3259 ) 3260 this = exp.select("*").from_(from_.this, copy=False) 3261 elif self._match(TokenType.SUMMARIZE): 3262 table = self._match(TokenType.TABLE) 3263 this = self._parse_select() or self._parse_string() or self._parse_table() 3264 return self.expression(exp.Summarize, this=this, table=table) 3265 elif self._match(TokenType.DESCRIBE): 3266 this = self._parse_describe() 3267 elif self._match_text_seq("STREAM"): 3268 this = self._parse_function() 3269 if this: 3270 this = self.expression(exp.Stream, this=this) 3271 else: 3272 self._retreat(self._index - 1) 3273 else: 3274 this = None 3275 3276 return self._parse_set_operations(this) if parse_set_operation else this 3277 3278 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3279 self._match_text_seq("SEARCH") 3280 3281 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3282 3283 if not kind: 3284 return None 3285 3286 self._match_text_seq("FIRST", "BY") 3287 3288 return self.expression( 3289 exp.RecursiveWithSearch, 3290 kind=kind, 3291 this=self._parse_id_var(), 3292 expression=self._match_text_seq("SET") and self._parse_id_var(), 3293 using=self._match_text_seq("USING") and self._parse_id_var(), 3294 ) 3295 3296 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3297 if not skip_with_token and not self._match(TokenType.WITH): 3298 return None 3299 3300 comments = self._prev_comments 3301 recursive = self._match(TokenType.RECURSIVE) 3302 3303 last_comments = None 3304 expressions = [] 3305 while True: 3306 cte = self._parse_cte() 3307 if isinstance(cte, exp.CTE): 3308 expressions.append(cte) 3309 if last_comments: 3310 cte.add_comments(last_comments) 3311 3312 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3313 break 3314 else: 3315 self._match(TokenType.WITH) 3316 3317 last_comments = self._prev_comments 3318 3319 return self.expression( 3320 exp.With, 3321 comments=comments, 3322 expressions=expressions, 3323 recursive=recursive, 3324 search=self._parse_recursive_with_search(), 3325 ) 3326 3327 def _parse_cte(self) -> t.Optional[exp.CTE]: 3328 index = self._index 3329 3330 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3331 if not alias or not alias.this: 3332 self.raise_error("Expected CTE to have alias") 3333 3334 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3335 self._retreat(index) 3336 return None 3337 3338 comments = self._prev_comments 3339 3340 if self._match_text_seq("NOT", "MATERIALIZED"): 3341 materialized = False 3342 elif self._match_text_seq("MATERIALIZED"): 3343 materialized = True 3344 else: 3345 materialized = None 3346 3347 cte = self.expression( 3348 exp.CTE, 3349 this=self._parse_wrapped(self._parse_statement), 3350 alias=alias, 3351 materialized=materialized, 3352 comments=comments, 3353 ) 3354 3355 if isinstance(cte.this, exp.Values): 3356 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3357 3358 return cte 3359 3360 def _parse_table_alias( 3361 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3362 ) -> t.Optional[exp.TableAlias]: 3363 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3364 # so this section tries to parse the clause version and if it fails, it treats the token 3365 # as an identifier (alias) 3366 if self._can_parse_limit_or_offset(): 3367 return None 3368 3369 any_token = self._match(TokenType.ALIAS) 3370 alias = ( 3371 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3372 or self._parse_string_as_identifier() 3373 ) 3374 3375 index = self._index 3376 if self._match(TokenType.L_PAREN): 3377 columns = self._parse_csv(self._parse_function_parameter) 3378 self._match_r_paren() if columns else self._retreat(index) 3379 else: 3380 columns = None 3381 3382 if not alias and not columns: 3383 return None 3384 3385 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3386 3387 # We bubble up comments from the Identifier to the TableAlias 3388 if isinstance(alias, exp.Identifier): 3389 table_alias.add_comments(alias.pop_comments()) 3390 3391 return table_alias 3392 3393 def _parse_subquery( 3394 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3395 ) -> t.Optional[exp.Subquery]: 3396 if not this: 3397 return None 3398 3399 return self.expression( 3400 exp.Subquery, 3401 this=this, 3402 pivots=self._parse_pivots(), 3403 alias=self._parse_table_alias() if parse_alias else None, 3404 sample=self._parse_table_sample(), 3405 ) 3406 3407 def _implicit_unnests_to_explicit(self, this: E) -> E: 3408 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3409 3410 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3411 for i, join in enumerate(this.args.get("joins") or []): 3412 table = join.this 3413 normalized_table = table.copy() 3414 normalized_table.meta["maybe_column"] = True 3415 normalized_table = _norm(normalized_table, dialect=self.dialect) 3416 3417 if isinstance(table, exp.Table) and not join.args.get("on"): 3418 if normalized_table.parts[0].name in refs: 3419 table_as_column = table.to_column() 3420 unnest = exp.Unnest(expressions=[table_as_column]) 3421 3422 # Table.to_column creates a parent Alias node that we want to convert to 3423 # a TableAlias and attach to the Unnest, so it matches the parser's output 3424 if isinstance(table.args.get("alias"), exp.TableAlias): 3425 table_as_column.replace(table_as_column.this) 3426 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3427 3428 table.replace(unnest) 3429 3430 refs.add(normalized_table.alias_or_name) 3431 3432 return this 3433 3434 def _parse_query_modifiers( 3435 self, this: t.Optional[exp.Expression] 3436 ) -> t.Optional[exp.Expression]: 3437 if isinstance(this, self.MODIFIABLES): 3438 for join in self._parse_joins(): 3439 this.append("joins", join) 3440 for lateral in iter(self._parse_lateral, None): 3441 this.append("laterals", lateral) 3442 3443 while True: 3444 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3445 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3446 key, expression = parser(self) 3447 3448 if expression: 3449 this.set(key, expression) 3450 if key == "limit": 3451 offset = expression.args.pop("offset", None) 3452 3453 if offset: 3454 offset = exp.Offset(expression=offset) 3455 this.set("offset", offset) 3456 3457 limit_by_expressions = expression.expressions 3458 expression.set("expressions", None) 3459 offset.set("expressions", limit_by_expressions) 3460 continue 3461 break 3462 3463 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3464 this = self._implicit_unnests_to_explicit(this) 3465 3466 return this 3467 3468 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3469 start = self._curr 3470 while self._curr: 3471 self._advance() 3472 3473 end = self._tokens[self._index - 1] 3474 return exp.Hint(expressions=[self._find_sql(start, end)]) 3475 3476 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3477 return self._parse_function_call() 3478 3479 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3480 start_index = self._index 3481 should_fallback_to_string = False 3482 3483 hints = [] 3484 try: 3485 for hint in iter( 3486 lambda: self._parse_csv( 3487 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3488 ), 3489 [], 3490 ): 3491 hints.extend(hint) 3492 except ParseError: 3493 should_fallback_to_string = True 3494 3495 if should_fallback_to_string or self._curr: 3496 self._retreat(start_index) 3497 return self._parse_hint_fallback_to_string() 3498 3499 return self.expression(exp.Hint, expressions=hints) 3500 3501 def _parse_hint(self) -> t.Optional[exp.Hint]: 3502 if self._match(TokenType.HINT) and self._prev_comments: 3503 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3504 3505 return None 3506 3507 def _parse_into(self) -> t.Optional[exp.Into]: 3508 if not self._match(TokenType.INTO): 3509 return None 3510 3511 temp = self._match(TokenType.TEMPORARY) 3512 unlogged = self._match_text_seq("UNLOGGED") 3513 self._match(TokenType.TABLE) 3514 3515 return self.expression( 3516 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3517 ) 3518 3519 def _parse_from( 3520 self, joins: bool = False, skip_from_token: bool = False 3521 ) -> t.Optional[exp.From]: 3522 if not skip_from_token and not self._match(TokenType.FROM): 3523 return None 3524 3525 return self.expression( 3526 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3527 ) 3528 3529 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3530 return self.expression( 3531 exp.MatchRecognizeMeasure, 3532 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3533 this=self._parse_expression(), 3534 ) 3535 3536 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3537 if not self._match(TokenType.MATCH_RECOGNIZE): 3538 return None 3539 3540 self._match_l_paren() 3541 3542 partition = self._parse_partition_by() 3543 order = self._parse_order() 3544 3545 measures = ( 3546 self._parse_csv(self._parse_match_recognize_measure) 3547 if self._match_text_seq("MEASURES") 3548 else None 3549 ) 3550 3551 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3552 rows = exp.var("ONE ROW PER MATCH") 3553 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3554 text = "ALL ROWS PER MATCH" 3555 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3556 text += " SHOW EMPTY MATCHES" 3557 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3558 text += " OMIT EMPTY MATCHES" 3559 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3560 text += " WITH UNMATCHED ROWS" 3561 rows = exp.var(text) 3562 else: 3563 rows = None 3564 3565 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3566 text = "AFTER MATCH SKIP" 3567 if self._match_text_seq("PAST", "LAST", "ROW"): 3568 text += " PAST LAST ROW" 3569 elif self._match_text_seq("TO", "NEXT", "ROW"): 3570 text += " TO NEXT ROW" 3571 elif self._match_text_seq("TO", "FIRST"): 3572 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3573 elif self._match_text_seq("TO", "LAST"): 3574 text += f" TO LAST {self._advance_any().text}" # type: ignore 3575 after = exp.var(text) 3576 else: 3577 after = None 3578 3579 if self._match_text_seq("PATTERN"): 3580 self._match_l_paren() 3581 3582 if not self._curr: 3583 self.raise_error("Expecting )", self._curr) 3584 3585 paren = 1 3586 start = self._curr 3587 3588 while self._curr and paren > 0: 3589 if self._curr.token_type == TokenType.L_PAREN: 3590 paren += 1 3591 if self._curr.token_type == TokenType.R_PAREN: 3592 paren -= 1 3593 3594 end = self._prev 3595 self._advance() 3596 3597 if paren > 0: 3598 self.raise_error("Expecting )", self._curr) 3599 3600 pattern = exp.var(self._find_sql(start, end)) 3601 else: 3602 pattern = None 3603 3604 define = ( 3605 self._parse_csv(self._parse_name_as_expression) 3606 if self._match_text_seq("DEFINE") 3607 else None 3608 ) 3609 3610 self._match_r_paren() 3611 3612 return self.expression( 3613 exp.MatchRecognize, 3614 partition_by=partition, 3615 order=order, 3616 measures=measures, 3617 rows=rows, 3618 after=after, 3619 pattern=pattern, 3620 define=define, 3621 alias=self._parse_table_alias(), 3622 ) 3623 3624 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3625 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3626 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3627 cross_apply = False 3628 3629 if cross_apply is not None: 3630 this = self._parse_select(table=True) 3631 view = None 3632 outer = None 3633 elif self._match(TokenType.LATERAL): 3634 this = self._parse_select(table=True) 3635 view = self._match(TokenType.VIEW) 3636 outer = self._match(TokenType.OUTER) 3637 else: 3638 return None 3639 3640 if not this: 3641 this = ( 3642 self._parse_unnest() 3643 or self._parse_function() 3644 or self._parse_id_var(any_token=False) 3645 ) 3646 3647 while self._match(TokenType.DOT): 3648 this = exp.Dot( 3649 this=this, 3650 expression=self._parse_function() or self._parse_id_var(any_token=False), 3651 ) 3652 3653 ordinality: t.Optional[bool] = None 3654 3655 if view: 3656 table = self._parse_id_var(any_token=False) 3657 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3658 table_alias: t.Optional[exp.TableAlias] = self.expression( 3659 exp.TableAlias, this=table, columns=columns 3660 ) 3661 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3662 # We move the alias from the lateral's child node to the lateral itself 3663 table_alias = this.args["alias"].pop() 3664 else: 3665 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3666 table_alias = self._parse_table_alias() 3667 3668 return self.expression( 3669 exp.Lateral, 3670 this=this, 3671 view=view, 3672 outer=outer, 3673 alias=table_alias, 3674 cross_apply=cross_apply, 3675 ordinality=ordinality, 3676 ) 3677 3678 def _parse_join_parts( 3679 self, 3680 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3681 return ( 3682 self._match_set(self.JOIN_METHODS) and self._prev, 3683 self._match_set(self.JOIN_SIDES) and self._prev, 3684 self._match_set(self.JOIN_KINDS) and self._prev, 3685 ) 3686 3687 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3688 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3689 this = self._parse_column() 3690 if isinstance(this, exp.Column): 3691 return this.this 3692 return this 3693 3694 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3695 3696 def _parse_join( 3697 self, skip_join_token: bool = False, parse_bracket: bool = False 3698 ) -> t.Optional[exp.Join]: 3699 if self._match(TokenType.COMMA): 3700 table = self._try_parse(self._parse_table) 3701 if table: 3702 return self.expression(exp.Join, this=table) 3703 return None 3704 3705 index = self._index 3706 method, side, kind = self._parse_join_parts() 3707 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3708 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3709 3710 if not skip_join_token and not join: 3711 self._retreat(index) 3712 kind = None 3713 method = None 3714 side = None 3715 3716 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3717 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3718 3719 if not skip_join_token and not join and not outer_apply and not cross_apply: 3720 return None 3721 3722 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3723 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3724 kwargs["expressions"] = self._parse_csv( 3725 lambda: self._parse_table(parse_bracket=parse_bracket) 3726 ) 3727 3728 if method: 3729 kwargs["method"] = method.text 3730 if side: 3731 kwargs["side"] = side.text 3732 if kind: 3733 kwargs["kind"] = kind.text 3734 if hint: 3735 kwargs["hint"] = hint 3736 3737 if self._match(TokenType.MATCH_CONDITION): 3738 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3739 3740 if self._match(TokenType.ON): 3741 kwargs["on"] = self._parse_assignment() 3742 elif self._match(TokenType.USING): 3743 kwargs["using"] = self._parse_using_identifiers() 3744 elif ( 3745 not (outer_apply or cross_apply) 3746 and not isinstance(kwargs["this"], exp.Unnest) 3747 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3748 ): 3749 index = self._index 3750 joins: t.Optional[list] = list(self._parse_joins()) 3751 3752 if joins and self._match(TokenType.ON): 3753 kwargs["on"] = self._parse_assignment() 3754 elif joins and self._match(TokenType.USING): 3755 kwargs["using"] = self._parse_using_identifiers() 3756 else: 3757 joins = None 3758 self._retreat(index) 3759 3760 kwargs["this"].set("joins", joins if joins else None) 3761 3762 kwargs["pivots"] = self._parse_pivots() 3763 3764 comments = [c for token in (method, side, kind) if token for c in token.comments] 3765 return self.expression(exp.Join, comments=comments, **kwargs) 3766 3767 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3768 this = self._parse_assignment() 3769 3770 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3771 return this 3772 3773 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3774 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3775 3776 return this 3777 3778 def _parse_index_params(self) -> exp.IndexParameters: 3779 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3780 3781 if self._match(TokenType.L_PAREN, advance=False): 3782 columns = self._parse_wrapped_csv(self._parse_with_operator) 3783 else: 3784 columns = None 3785 3786 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3787 partition_by = self._parse_partition_by() 3788 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3789 tablespace = ( 3790 self._parse_var(any_token=True) 3791 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3792 else None 3793 ) 3794 where = self._parse_where() 3795 3796 on = self._parse_field() if self._match(TokenType.ON) else None 3797 3798 return self.expression( 3799 exp.IndexParameters, 3800 using=using, 3801 columns=columns, 3802 include=include, 3803 partition_by=partition_by, 3804 where=where, 3805 with_storage=with_storage, 3806 tablespace=tablespace, 3807 on=on, 3808 ) 3809 3810 def _parse_index( 3811 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3812 ) -> t.Optional[exp.Index]: 3813 if index or anonymous: 3814 unique = None 3815 primary = None 3816 amp = None 3817 3818 self._match(TokenType.ON) 3819 self._match(TokenType.TABLE) # hive 3820 table = self._parse_table_parts(schema=True) 3821 else: 3822 unique = self._match(TokenType.UNIQUE) 3823 primary = self._match_text_seq("PRIMARY") 3824 amp = self._match_text_seq("AMP") 3825 3826 if not self._match(TokenType.INDEX): 3827 return None 3828 3829 index = self._parse_id_var() 3830 table = None 3831 3832 params = self._parse_index_params() 3833 3834 return self.expression( 3835 exp.Index, 3836 this=index, 3837 table=table, 3838 unique=unique, 3839 primary=primary, 3840 amp=amp, 3841 params=params, 3842 ) 3843 3844 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3845 hints: t.List[exp.Expression] = [] 3846 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3847 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3848 hints.append( 3849 self.expression( 3850 exp.WithTableHint, 3851 expressions=self._parse_csv( 3852 lambda: self._parse_function() or self._parse_var(any_token=True) 3853 ), 3854 ) 3855 ) 3856 self._match_r_paren() 3857 else: 3858 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3859 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3860 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3861 3862 self._match_set((TokenType.INDEX, TokenType.KEY)) 3863 if self._match(TokenType.FOR): 3864 hint.set("target", self._advance_any() and self._prev.text.upper()) 3865 3866 hint.set("expressions", self._parse_wrapped_id_vars()) 3867 hints.append(hint) 3868 3869 return hints or None 3870 3871 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3872 return ( 3873 (not schema and self._parse_function(optional_parens=False)) 3874 or self._parse_id_var(any_token=False) 3875 or self._parse_string_as_identifier() 3876 or self._parse_placeholder() 3877 ) 3878 3879 def _parse_table_parts( 3880 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3881 ) -> exp.Table: 3882 catalog = None 3883 db = None 3884 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3885 3886 while self._match(TokenType.DOT): 3887 if catalog: 3888 # This allows nesting the table in arbitrarily many dot expressions if needed 3889 table = self.expression( 3890 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3891 ) 3892 else: 3893 catalog = db 3894 db = table 3895 # "" used for tsql FROM a..b case 3896 table = self._parse_table_part(schema=schema) or "" 3897 3898 if ( 3899 wildcard 3900 and self._is_connected() 3901 and (isinstance(table, exp.Identifier) or not table) 3902 and self._match(TokenType.STAR) 3903 ): 3904 if isinstance(table, exp.Identifier): 3905 table.args["this"] += "*" 3906 else: 3907 table = exp.Identifier(this="*") 3908 3909 # We bubble up comments from the Identifier to the Table 3910 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3911 3912 if is_db_reference: 3913 catalog = db 3914 db = table 3915 table = None 3916 3917 if not table and not is_db_reference: 3918 self.raise_error(f"Expected table name but got {self._curr}") 3919 if not db and is_db_reference: 3920 self.raise_error(f"Expected database name but got {self._curr}") 3921 3922 table = self.expression( 3923 exp.Table, 3924 comments=comments, 3925 this=table, 3926 db=db, 3927 catalog=catalog, 3928 ) 3929 3930 changes = self._parse_changes() 3931 if changes: 3932 table.set("changes", changes) 3933 3934 at_before = self._parse_historical_data() 3935 if at_before: 3936 table.set("when", at_before) 3937 3938 pivots = self._parse_pivots() 3939 if pivots: 3940 table.set("pivots", pivots) 3941 3942 return table 3943 3944 def _parse_table( 3945 self, 3946 schema: bool = False, 3947 joins: bool = False, 3948 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3949 parse_bracket: bool = False, 3950 is_db_reference: bool = False, 3951 parse_partition: bool = False, 3952 ) -> t.Optional[exp.Expression]: 3953 lateral = self._parse_lateral() 3954 if lateral: 3955 return lateral 3956 3957 unnest = self._parse_unnest() 3958 if unnest: 3959 return unnest 3960 3961 values = self._parse_derived_table_values() 3962 if values: 3963 return values 3964 3965 subquery = self._parse_select(table=True) 3966 if subquery: 3967 if not subquery.args.get("pivots"): 3968 subquery.set("pivots", self._parse_pivots()) 3969 return subquery 3970 3971 bracket = parse_bracket and self._parse_bracket(None) 3972 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3973 3974 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3975 self._parse_table 3976 ) 3977 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3978 3979 only = self._match(TokenType.ONLY) 3980 3981 this = t.cast( 3982 exp.Expression, 3983 bracket 3984 or rows_from 3985 or self._parse_bracket( 3986 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3987 ), 3988 ) 3989 3990 if only: 3991 this.set("only", only) 3992 3993 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3994 self._match_text_seq("*") 3995 3996 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3997 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3998 this.set("partition", self._parse_partition()) 3999 4000 if schema: 4001 return self._parse_schema(this=this) 4002 4003 version = self._parse_version() 4004 4005 if version: 4006 this.set("version", version) 4007 4008 if self.dialect.ALIAS_POST_TABLESAMPLE: 4009 this.set("sample", self._parse_table_sample()) 4010 4011 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4012 if alias: 4013 this.set("alias", alias) 4014 4015 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4016 return self.expression( 4017 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4018 ) 4019 4020 this.set("hints", self._parse_table_hints()) 4021 4022 if not this.args.get("pivots"): 4023 this.set("pivots", self._parse_pivots()) 4024 4025 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4026 this.set("sample", self._parse_table_sample()) 4027 4028 if joins: 4029 for join in self._parse_joins(): 4030 this.append("joins", join) 4031 4032 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4033 this.set("ordinality", True) 4034 this.set("alias", self._parse_table_alias()) 4035 4036 return this 4037 4038 def _parse_version(self) -> t.Optional[exp.Version]: 4039 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4040 this = "TIMESTAMP" 4041 elif self._match(TokenType.VERSION_SNAPSHOT): 4042 this = "VERSION" 4043 else: 4044 return None 4045 4046 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4047 kind = self._prev.text.upper() 4048 start = self._parse_bitwise() 4049 self._match_texts(("TO", "AND")) 4050 end = self._parse_bitwise() 4051 expression: t.Optional[exp.Expression] = self.expression( 4052 exp.Tuple, expressions=[start, end] 4053 ) 4054 elif self._match_text_seq("CONTAINED", "IN"): 4055 kind = "CONTAINED IN" 4056 expression = self.expression( 4057 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4058 ) 4059 elif self._match(TokenType.ALL): 4060 kind = "ALL" 4061 expression = None 4062 else: 4063 self._match_text_seq("AS", "OF") 4064 kind = "AS OF" 4065 expression = self._parse_type() 4066 4067 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4068 4069 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4070 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4071 index = self._index 4072 historical_data = None 4073 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4074 this = self._prev.text.upper() 4075 kind = ( 4076 self._match(TokenType.L_PAREN) 4077 and self._match_texts(self.HISTORICAL_DATA_KIND) 4078 and self._prev.text.upper() 4079 ) 4080 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4081 4082 if expression: 4083 self._match_r_paren() 4084 historical_data = self.expression( 4085 exp.HistoricalData, this=this, kind=kind, expression=expression 4086 ) 4087 else: 4088 self._retreat(index) 4089 4090 return historical_data 4091 4092 def _parse_changes(self) -> t.Optional[exp.Changes]: 4093 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4094 return None 4095 4096 information = self._parse_var(any_token=True) 4097 self._match_r_paren() 4098 4099 return self.expression( 4100 exp.Changes, 4101 information=information, 4102 at_before=self._parse_historical_data(), 4103 end=self._parse_historical_data(), 4104 ) 4105 4106 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4107 if not self._match(TokenType.UNNEST): 4108 return None 4109 4110 expressions = self._parse_wrapped_csv(self._parse_equality) 4111 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4112 4113 alias = self._parse_table_alias() if with_alias else None 4114 4115 if alias: 4116 if self.dialect.UNNEST_COLUMN_ONLY: 4117 if alias.args.get("columns"): 4118 self.raise_error("Unexpected extra column alias in unnest.") 4119 4120 alias.set("columns", [alias.this]) 4121 alias.set("this", None) 4122 4123 columns = alias.args.get("columns") or [] 4124 if offset and len(expressions) < len(columns): 4125 offset = columns.pop() 4126 4127 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4128 self._match(TokenType.ALIAS) 4129 offset = self._parse_id_var( 4130 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4131 ) or exp.to_identifier("offset") 4132 4133 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4134 4135 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4136 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4137 if not is_derived and not ( 4138 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4139 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4140 ): 4141 return None 4142 4143 expressions = self._parse_csv(self._parse_value) 4144 alias = self._parse_table_alias() 4145 4146 if is_derived: 4147 self._match_r_paren() 4148 4149 return self.expression( 4150 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4151 ) 4152 4153 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4154 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4155 as_modifier and self._match_text_seq("USING", "SAMPLE") 4156 ): 4157 return None 4158 4159 bucket_numerator = None 4160 bucket_denominator = None 4161 bucket_field = None 4162 percent = None 4163 size = None 4164 seed = None 4165 4166 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4167 matched_l_paren = self._match(TokenType.L_PAREN) 4168 4169 if self.TABLESAMPLE_CSV: 4170 num = None 4171 expressions = self._parse_csv(self._parse_primary) 4172 else: 4173 expressions = None 4174 num = ( 4175 self._parse_factor() 4176 if self._match(TokenType.NUMBER, advance=False) 4177 else self._parse_primary() or self._parse_placeholder() 4178 ) 4179 4180 if self._match_text_seq("BUCKET"): 4181 bucket_numerator = self._parse_number() 4182 self._match_text_seq("OUT", "OF") 4183 bucket_denominator = bucket_denominator = self._parse_number() 4184 self._match(TokenType.ON) 4185 bucket_field = self._parse_field() 4186 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4187 percent = num 4188 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4189 size = num 4190 else: 4191 percent = num 4192 4193 if matched_l_paren: 4194 self._match_r_paren() 4195 4196 if self._match(TokenType.L_PAREN): 4197 method = self._parse_var(upper=True) 4198 seed = self._match(TokenType.COMMA) and self._parse_number() 4199 self._match_r_paren() 4200 elif self._match_texts(("SEED", "REPEATABLE")): 4201 seed = self._parse_wrapped(self._parse_number) 4202 4203 if not method and self.DEFAULT_SAMPLING_METHOD: 4204 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4205 4206 return self.expression( 4207 exp.TableSample, 4208 expressions=expressions, 4209 method=method, 4210 bucket_numerator=bucket_numerator, 4211 bucket_denominator=bucket_denominator, 4212 bucket_field=bucket_field, 4213 percent=percent, 4214 size=size, 4215 seed=seed, 4216 ) 4217 4218 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4219 return list(iter(self._parse_pivot, None)) or None 4220 4221 def _parse_joins(self) -> t.Iterator[exp.Join]: 4222 return iter(self._parse_join, None) 4223 4224 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4225 if not self._match(TokenType.INTO): 4226 return None 4227 4228 return self.expression( 4229 exp.UnpivotColumns, 4230 this=self._match_text_seq("NAME") and self._parse_column(), 4231 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4232 ) 4233 4234 # https://duckdb.org/docs/sql/statements/pivot 4235 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4236 def _parse_on() -> t.Optional[exp.Expression]: 4237 this = self._parse_bitwise() 4238 4239 if self._match(TokenType.IN): 4240 # PIVOT ... ON col IN (row_val1, row_val2) 4241 return self._parse_in(this) 4242 if self._match(TokenType.ALIAS, advance=False): 4243 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4244 return self._parse_alias(this) 4245 4246 return this 4247 4248 this = self._parse_table() 4249 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4250 into = self._parse_unpivot_columns() 4251 using = self._match(TokenType.USING) and self._parse_csv( 4252 lambda: self._parse_alias(self._parse_function()) 4253 ) 4254 group = self._parse_group() 4255 4256 return self.expression( 4257 exp.Pivot, 4258 this=this, 4259 expressions=expressions, 4260 using=using, 4261 group=group, 4262 unpivot=is_unpivot, 4263 into=into, 4264 ) 4265 4266 def _parse_pivot_in(self) -> exp.In: 4267 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4268 this = self._parse_select_or_expression() 4269 4270 self._match(TokenType.ALIAS) 4271 alias = self._parse_bitwise() 4272 if alias: 4273 if isinstance(alias, exp.Column) and not alias.db: 4274 alias = alias.this 4275 return self.expression(exp.PivotAlias, this=this, alias=alias) 4276 4277 return this 4278 4279 value = self._parse_column() 4280 4281 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4282 self.raise_error("Expecting IN (") 4283 4284 if self._match(TokenType.ANY): 4285 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4286 else: 4287 exprs = self._parse_csv(_parse_aliased_expression) 4288 4289 self._match_r_paren() 4290 return self.expression(exp.In, this=value, expressions=exprs) 4291 4292 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4293 index = self._index 4294 include_nulls = None 4295 4296 if self._match(TokenType.PIVOT): 4297 unpivot = False 4298 elif self._match(TokenType.UNPIVOT): 4299 unpivot = True 4300 4301 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4302 if self._match_text_seq("INCLUDE", "NULLS"): 4303 include_nulls = True 4304 elif self._match_text_seq("EXCLUDE", "NULLS"): 4305 include_nulls = False 4306 else: 4307 return None 4308 4309 expressions = [] 4310 4311 if not self._match(TokenType.L_PAREN): 4312 self._retreat(index) 4313 return None 4314 4315 if unpivot: 4316 expressions = self._parse_csv(self._parse_column) 4317 else: 4318 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4319 4320 if not expressions: 4321 self.raise_error("Failed to parse PIVOT's aggregation list") 4322 4323 if not self._match(TokenType.FOR): 4324 self.raise_error("Expecting FOR") 4325 4326 fields = [] 4327 while True: 4328 field = self._try_parse(self._parse_pivot_in) 4329 if not field: 4330 break 4331 fields.append(field) 4332 4333 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4334 self._parse_bitwise 4335 ) 4336 4337 group = self._parse_group() 4338 4339 self._match_r_paren() 4340 4341 pivot = self.expression( 4342 exp.Pivot, 4343 expressions=expressions, 4344 fields=fields, 4345 unpivot=unpivot, 4346 include_nulls=include_nulls, 4347 default_on_null=default_on_null, 4348 group=group, 4349 ) 4350 4351 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4352 pivot.set("alias", self._parse_table_alias()) 4353 4354 if not unpivot: 4355 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4356 4357 columns: t.List[exp.Expression] = [] 4358 all_fields = [] 4359 for pivot_field in pivot.fields: 4360 pivot_field_expressions = pivot_field.expressions 4361 4362 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4363 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4364 continue 4365 4366 all_fields.append( 4367 [ 4368 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4369 for fld in pivot_field_expressions 4370 ] 4371 ) 4372 4373 if all_fields: 4374 if names: 4375 all_fields.append(names) 4376 4377 # Generate all possible combinations of the pivot columns 4378 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4379 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4380 for fld_parts_tuple in itertools.product(*all_fields): 4381 fld_parts = list(fld_parts_tuple) 4382 4383 if names and self.PREFIXED_PIVOT_COLUMNS: 4384 # Move the "name" to the front of the list 4385 fld_parts.insert(0, fld_parts.pop(-1)) 4386 4387 columns.append(exp.to_identifier("_".join(fld_parts))) 4388 4389 pivot.set("columns", columns) 4390 4391 return pivot 4392 4393 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4394 return [agg.alias for agg in aggregations if agg.alias] 4395 4396 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4397 if not skip_where_token and not self._match(TokenType.PREWHERE): 4398 return None 4399 4400 return self.expression( 4401 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4402 ) 4403 4404 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4405 if not skip_where_token and not self._match(TokenType.WHERE): 4406 return None 4407 4408 return self.expression( 4409 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4410 ) 4411 4412 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4413 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4414 return None 4415 4416 elements: t.Dict[str, t.Any] = defaultdict(list) 4417 4418 if self._match(TokenType.ALL): 4419 elements["all"] = True 4420 elif self._match(TokenType.DISTINCT): 4421 elements["all"] = False 4422 4423 while True: 4424 index = self._index 4425 4426 elements["expressions"].extend( 4427 self._parse_csv( 4428 lambda: None 4429 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4430 else self._parse_assignment() 4431 ) 4432 ) 4433 4434 before_with_index = self._index 4435 with_prefix = self._match(TokenType.WITH) 4436 4437 if self._match(TokenType.ROLLUP): 4438 elements["rollup"].append( 4439 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4440 ) 4441 elif self._match(TokenType.CUBE): 4442 elements["cube"].append( 4443 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4444 ) 4445 elif self._match(TokenType.GROUPING_SETS): 4446 elements["grouping_sets"].append( 4447 self.expression( 4448 exp.GroupingSets, 4449 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4450 ) 4451 ) 4452 elif self._match_text_seq("TOTALS"): 4453 elements["totals"] = True # type: ignore 4454 4455 if before_with_index <= self._index <= before_with_index + 1: 4456 self._retreat(before_with_index) 4457 break 4458 4459 if index == self._index: 4460 break 4461 4462 return self.expression(exp.Group, **elements) # type: ignore 4463 4464 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4465 return self.expression( 4466 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4467 ) 4468 4469 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4470 if self._match(TokenType.L_PAREN): 4471 grouping_set = self._parse_csv(self._parse_column) 4472 self._match_r_paren() 4473 return self.expression(exp.Tuple, expressions=grouping_set) 4474 4475 return self._parse_column() 4476 4477 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4478 if not skip_having_token and not self._match(TokenType.HAVING): 4479 return None 4480 return self.expression(exp.Having, this=self._parse_assignment()) 4481 4482 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4483 if not self._match(TokenType.QUALIFY): 4484 return None 4485 return self.expression(exp.Qualify, this=self._parse_assignment()) 4486 4487 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4488 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4489 exp.Prior, this=self._parse_bitwise() 4490 ) 4491 connect = self._parse_assignment() 4492 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4493 return connect 4494 4495 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4496 if skip_start_token: 4497 start = None 4498 elif self._match(TokenType.START_WITH): 4499 start = self._parse_assignment() 4500 else: 4501 return None 4502 4503 self._match(TokenType.CONNECT_BY) 4504 nocycle = self._match_text_seq("NOCYCLE") 4505 connect = self._parse_connect_with_prior() 4506 4507 if not start and self._match(TokenType.START_WITH): 4508 start = self._parse_assignment() 4509 4510 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4511 4512 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4513 this = self._parse_id_var(any_token=True) 4514 if self._match(TokenType.ALIAS): 4515 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4516 return this 4517 4518 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4519 if self._match_text_seq("INTERPOLATE"): 4520 return self._parse_wrapped_csv(self._parse_name_as_expression) 4521 return None 4522 4523 def _parse_order( 4524 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4525 ) -> t.Optional[exp.Expression]: 4526 siblings = None 4527 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4528 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4529 return this 4530 4531 siblings = True 4532 4533 return self.expression( 4534 exp.Order, 4535 this=this, 4536 expressions=self._parse_csv(self._parse_ordered), 4537 siblings=siblings, 4538 ) 4539 4540 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4541 if not self._match(token): 4542 return None 4543 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4544 4545 def _parse_ordered( 4546 self, parse_method: t.Optional[t.Callable] = None 4547 ) -> t.Optional[exp.Ordered]: 4548 this = parse_method() if parse_method else self._parse_assignment() 4549 if not this: 4550 return None 4551 4552 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4553 this = exp.var("ALL") 4554 4555 asc = self._match(TokenType.ASC) 4556 desc = self._match(TokenType.DESC) or (asc and False) 4557 4558 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4559 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4560 4561 nulls_first = is_nulls_first or False 4562 explicitly_null_ordered = is_nulls_first or is_nulls_last 4563 4564 if ( 4565 not explicitly_null_ordered 4566 and ( 4567 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4568 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4569 ) 4570 and self.dialect.NULL_ORDERING != "nulls_are_last" 4571 ): 4572 nulls_first = True 4573 4574 if self._match_text_seq("WITH", "FILL"): 4575 with_fill = self.expression( 4576 exp.WithFill, 4577 **{ # type: ignore 4578 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4579 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4580 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4581 "interpolate": self._parse_interpolate(), 4582 }, 4583 ) 4584 else: 4585 with_fill = None 4586 4587 return self.expression( 4588 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4589 ) 4590 4591 def _parse_limit_options(self) -> exp.LimitOptions: 4592 percent = self._match(TokenType.PERCENT) 4593 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4594 self._match_text_seq("ONLY") 4595 with_ties = self._match_text_seq("WITH", "TIES") 4596 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4597 4598 def _parse_limit( 4599 self, 4600 this: t.Optional[exp.Expression] = None, 4601 top: bool = False, 4602 skip_limit_token: bool = False, 4603 ) -> t.Optional[exp.Expression]: 4604 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4605 comments = self._prev_comments 4606 if top: 4607 limit_paren = self._match(TokenType.L_PAREN) 4608 expression = self._parse_term() if limit_paren else self._parse_number() 4609 4610 if limit_paren: 4611 self._match_r_paren() 4612 4613 limit_options = self._parse_limit_options() 4614 else: 4615 limit_options = None 4616 expression = self._parse_term() 4617 4618 if self._match(TokenType.COMMA): 4619 offset = expression 4620 expression = self._parse_term() 4621 else: 4622 offset = None 4623 4624 limit_exp = self.expression( 4625 exp.Limit, 4626 this=this, 4627 expression=expression, 4628 offset=offset, 4629 comments=comments, 4630 limit_options=limit_options, 4631 expressions=self._parse_limit_by(), 4632 ) 4633 4634 return limit_exp 4635 4636 if self._match(TokenType.FETCH): 4637 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4638 direction = self._prev.text.upper() if direction else "FIRST" 4639 4640 count = self._parse_field(tokens=self.FETCH_TOKENS) 4641 4642 return self.expression( 4643 exp.Fetch, 4644 direction=direction, 4645 count=count, 4646 limit_options=self._parse_limit_options(), 4647 ) 4648 4649 return this 4650 4651 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4652 if not self._match(TokenType.OFFSET): 4653 return this 4654 4655 count = self._parse_term() 4656 self._match_set((TokenType.ROW, TokenType.ROWS)) 4657 4658 return self.expression( 4659 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4660 ) 4661 4662 def _can_parse_limit_or_offset(self) -> bool: 4663 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4664 return False 4665 4666 index = self._index 4667 result = bool( 4668 self._try_parse(self._parse_limit, retreat=True) 4669 or self._try_parse(self._parse_offset, retreat=True) 4670 ) 4671 self._retreat(index) 4672 return result 4673 4674 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4675 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4676 4677 def _parse_locks(self) -> t.List[exp.Lock]: 4678 locks = [] 4679 while True: 4680 if self._match_text_seq("FOR", "UPDATE"): 4681 update = True 4682 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4683 "LOCK", "IN", "SHARE", "MODE" 4684 ): 4685 update = False 4686 else: 4687 break 4688 4689 expressions = None 4690 if self._match_text_seq("OF"): 4691 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4692 4693 wait: t.Optional[bool | exp.Expression] = None 4694 if self._match_text_seq("NOWAIT"): 4695 wait = True 4696 elif self._match_text_seq("WAIT"): 4697 wait = self._parse_primary() 4698 elif self._match_text_seq("SKIP", "LOCKED"): 4699 wait = False 4700 4701 locks.append( 4702 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4703 ) 4704 4705 return locks 4706 4707 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4708 start = self._index 4709 _, side_token, kind_token = self._parse_join_parts() 4710 4711 side = side_token.text if side_token else None 4712 kind = kind_token.text if kind_token else None 4713 4714 if not self._match_set(self.SET_OPERATIONS): 4715 self._retreat(start) 4716 return None 4717 4718 token_type = self._prev.token_type 4719 4720 if token_type == TokenType.UNION: 4721 operation: t.Type[exp.SetOperation] = exp.Union 4722 elif token_type == TokenType.EXCEPT: 4723 operation = exp.Except 4724 else: 4725 operation = exp.Intersect 4726 4727 comments = self._prev.comments 4728 4729 if self._match(TokenType.DISTINCT): 4730 distinct: t.Optional[bool] = True 4731 elif self._match(TokenType.ALL): 4732 distinct = False 4733 else: 4734 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4735 if distinct is None: 4736 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4737 4738 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4739 "STRICT", "CORRESPONDING" 4740 ) 4741 if self._match_text_seq("CORRESPONDING"): 4742 by_name = True 4743 if not side and not kind: 4744 kind = "INNER" 4745 4746 on_column_list = None 4747 if by_name and self._match_texts(("ON", "BY")): 4748 on_column_list = self._parse_wrapped_csv(self._parse_column) 4749 4750 expression = self._parse_select(nested=True, parse_set_operation=False) 4751 4752 return self.expression( 4753 operation, 4754 comments=comments, 4755 this=this, 4756 distinct=distinct, 4757 by_name=by_name, 4758 expression=expression, 4759 side=side, 4760 kind=kind, 4761 on=on_column_list, 4762 ) 4763 4764 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4765 while this: 4766 setop = self.parse_set_operation(this) 4767 if not setop: 4768 break 4769 this = setop 4770 4771 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4772 expression = this.expression 4773 4774 if expression: 4775 for arg in self.SET_OP_MODIFIERS: 4776 expr = expression.args.get(arg) 4777 if expr: 4778 this.set(arg, expr.pop()) 4779 4780 return this 4781 4782 def _parse_expression(self) -> t.Optional[exp.Expression]: 4783 return self._parse_alias(self._parse_assignment()) 4784 4785 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4786 this = self._parse_disjunction() 4787 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4788 # This allows us to parse <non-identifier token> := <expr> 4789 this = exp.column( 4790 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4791 ) 4792 4793 while self._match_set(self.ASSIGNMENT): 4794 if isinstance(this, exp.Column) and len(this.parts) == 1: 4795 this = this.this 4796 4797 this = self.expression( 4798 self.ASSIGNMENT[self._prev.token_type], 4799 this=this, 4800 comments=self._prev_comments, 4801 expression=self._parse_assignment(), 4802 ) 4803 4804 return this 4805 4806 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4807 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4808 4809 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4810 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4811 4812 def _parse_equality(self) -> t.Optional[exp.Expression]: 4813 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4814 4815 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4816 return self._parse_tokens(self._parse_range, self.COMPARISON) 4817 4818 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4819 this = this or self._parse_bitwise() 4820 negate = self._match(TokenType.NOT) 4821 4822 if self._match_set(self.RANGE_PARSERS): 4823 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4824 if not expression: 4825 return this 4826 4827 this = expression 4828 elif self._match(TokenType.ISNULL): 4829 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4830 4831 # Postgres supports ISNULL and NOTNULL for conditions. 4832 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4833 if self._match(TokenType.NOTNULL): 4834 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4835 this = self.expression(exp.Not, this=this) 4836 4837 if negate: 4838 this = self._negate_range(this) 4839 4840 if self._match(TokenType.IS): 4841 this = self._parse_is(this) 4842 4843 return this 4844 4845 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4846 if not this: 4847 return this 4848 4849 return self.expression(exp.Not, this=this) 4850 4851 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4852 index = self._index - 1 4853 negate = self._match(TokenType.NOT) 4854 4855 if self._match_text_seq("DISTINCT", "FROM"): 4856 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4857 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4858 4859 if self._match(TokenType.JSON): 4860 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4861 4862 if self._match_text_seq("WITH"): 4863 _with = True 4864 elif self._match_text_seq("WITHOUT"): 4865 _with = False 4866 else: 4867 _with = None 4868 4869 unique = self._match(TokenType.UNIQUE) 4870 self._match_text_seq("KEYS") 4871 expression: t.Optional[exp.Expression] = self.expression( 4872 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4873 ) 4874 else: 4875 expression = self._parse_primary() or self._parse_null() 4876 if not expression: 4877 self._retreat(index) 4878 return None 4879 4880 this = self.expression(exp.Is, this=this, expression=expression) 4881 return self.expression(exp.Not, this=this) if negate else this 4882 4883 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4884 unnest = self._parse_unnest(with_alias=False) 4885 if unnest: 4886 this = self.expression(exp.In, this=this, unnest=unnest) 4887 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4888 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4889 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4890 4891 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4892 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4893 else: 4894 this = self.expression(exp.In, this=this, expressions=expressions) 4895 4896 if matched_l_paren: 4897 self._match_r_paren(this) 4898 elif not self._match(TokenType.R_BRACKET, expression=this): 4899 self.raise_error("Expecting ]") 4900 else: 4901 this = self.expression(exp.In, this=this, field=self._parse_column()) 4902 4903 return this 4904 4905 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4906 low = self._parse_bitwise() 4907 self._match(TokenType.AND) 4908 high = self._parse_bitwise() 4909 return self.expression(exp.Between, this=this, low=low, high=high) 4910 4911 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4912 if not self._match(TokenType.ESCAPE): 4913 return this 4914 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4915 4916 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4917 index = self._index 4918 4919 if not self._match(TokenType.INTERVAL) and match_interval: 4920 return None 4921 4922 if self._match(TokenType.STRING, advance=False): 4923 this = self._parse_primary() 4924 else: 4925 this = self._parse_term() 4926 4927 if not this or ( 4928 isinstance(this, exp.Column) 4929 and not this.table 4930 and not this.this.quoted 4931 and this.name.upper() == "IS" 4932 ): 4933 self._retreat(index) 4934 return None 4935 4936 unit = self._parse_function() or ( 4937 not self._match(TokenType.ALIAS, advance=False) 4938 and self._parse_var(any_token=True, upper=True) 4939 ) 4940 4941 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4942 # each INTERVAL expression into this canonical form so it's easy to transpile 4943 if this and this.is_number: 4944 this = exp.Literal.string(this.to_py()) 4945 elif this and this.is_string: 4946 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4947 if parts and unit: 4948 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4949 unit = None 4950 self._retreat(self._index - 1) 4951 4952 if len(parts) == 1: 4953 this = exp.Literal.string(parts[0][0]) 4954 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4955 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4956 unit = self.expression( 4957 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4958 ) 4959 4960 interval = self.expression(exp.Interval, this=this, unit=unit) 4961 4962 index = self._index 4963 self._match(TokenType.PLUS) 4964 4965 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4966 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4967 return self.expression( 4968 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4969 ) 4970 4971 self._retreat(index) 4972 return interval 4973 4974 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4975 this = self._parse_term() 4976 4977 while True: 4978 if self._match_set(self.BITWISE): 4979 this = self.expression( 4980 self.BITWISE[self._prev.token_type], 4981 this=this, 4982 expression=self._parse_term(), 4983 ) 4984 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4985 this = self.expression( 4986 exp.DPipe, 4987 this=this, 4988 expression=self._parse_term(), 4989 safe=not self.dialect.STRICT_STRING_CONCAT, 4990 ) 4991 elif self._match(TokenType.DQMARK): 4992 this = self.expression( 4993 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4994 ) 4995 elif self._match_pair(TokenType.LT, TokenType.LT): 4996 this = self.expression( 4997 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4998 ) 4999 elif self._match_pair(TokenType.GT, TokenType.GT): 5000 this = self.expression( 5001 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5002 ) 5003 else: 5004 break 5005 5006 return this 5007 5008 def _parse_term(self) -> t.Optional[exp.Expression]: 5009 this = self._parse_factor() 5010 5011 while self._match_set(self.TERM): 5012 klass = self.TERM[self._prev.token_type] 5013 comments = self._prev_comments 5014 expression = self._parse_factor() 5015 5016 this = self.expression(klass, this=this, comments=comments, expression=expression) 5017 5018 if isinstance(this, exp.Collate): 5019 expr = this.expression 5020 5021 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5022 # fallback to Identifier / Var 5023 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5024 ident = expr.this 5025 if isinstance(ident, exp.Identifier): 5026 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5027 5028 return this 5029 5030 def _parse_factor(self) -> t.Optional[exp.Expression]: 5031 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5032 this = parse_method() 5033 5034 while self._match_set(self.FACTOR): 5035 klass = self.FACTOR[self._prev.token_type] 5036 comments = self._prev_comments 5037 expression = parse_method() 5038 5039 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5040 self._retreat(self._index - 1) 5041 return this 5042 5043 this = self.expression(klass, this=this, comments=comments, expression=expression) 5044 5045 if isinstance(this, exp.Div): 5046 this.args["typed"] = self.dialect.TYPED_DIVISION 5047 this.args["safe"] = self.dialect.SAFE_DIVISION 5048 5049 return this 5050 5051 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5052 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5053 5054 def _parse_unary(self) -> t.Optional[exp.Expression]: 5055 if self._match_set(self.UNARY_PARSERS): 5056 return self.UNARY_PARSERS[self._prev.token_type](self) 5057 return self._parse_at_time_zone(self._parse_type()) 5058 5059 def _parse_type( 5060 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5061 ) -> t.Optional[exp.Expression]: 5062 interval = parse_interval and self._parse_interval() 5063 if interval: 5064 return interval 5065 5066 index = self._index 5067 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5068 5069 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5070 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5071 if isinstance(data_type, exp.Cast): 5072 # This constructor can contain ops directly after it, for instance struct unnesting: 5073 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5074 return self._parse_column_ops(data_type) 5075 5076 if data_type: 5077 index2 = self._index 5078 this = self._parse_primary() 5079 5080 if isinstance(this, exp.Literal): 5081 this = self._parse_column_ops(this) 5082 5083 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5084 if parser: 5085 return parser(self, this, data_type) 5086 5087 return self.expression(exp.Cast, this=this, to=data_type) 5088 5089 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5090 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5091 # 5092 # If the index difference here is greater than 1, that means the parser itself must have 5093 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5094 # 5095 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5096 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5097 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5098 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5099 # 5100 # In these cases, we don't really want to return the converted type, but instead retreat 5101 # and try to parse a Column or Identifier in the section below. 5102 if data_type.expressions and index2 - index > 1: 5103 self._retreat(index2) 5104 return self._parse_column_ops(data_type) 5105 5106 self._retreat(index) 5107 5108 if fallback_to_identifier: 5109 return self._parse_id_var() 5110 5111 this = self._parse_column() 5112 return this and self._parse_column_ops(this) 5113 5114 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5115 this = self._parse_type() 5116 if not this: 5117 return None 5118 5119 if isinstance(this, exp.Column) and not this.table: 5120 this = exp.var(this.name.upper()) 5121 5122 return self.expression( 5123 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5124 ) 5125 5126 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5127 type_name = identifier.name 5128 5129 while self._match(TokenType.DOT): 5130 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5131 5132 return exp.DataType.build(type_name, udt=True) 5133 5134 def _parse_types( 5135 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5136 ) -> t.Optional[exp.Expression]: 5137 index = self._index 5138 5139 this: t.Optional[exp.Expression] = None 5140 prefix = self._match_text_seq("SYSUDTLIB", ".") 5141 5142 if not self._match_set(self.TYPE_TOKENS): 5143 identifier = allow_identifiers and self._parse_id_var( 5144 any_token=False, tokens=(TokenType.VAR,) 5145 ) 5146 if isinstance(identifier, exp.Identifier): 5147 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5148 5149 if len(tokens) != 1: 5150 self.raise_error("Unexpected identifier", self._prev) 5151 5152 if tokens[0].token_type in self.TYPE_TOKENS: 5153 self._prev = tokens[0] 5154 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5155 this = self._parse_user_defined_type(identifier) 5156 else: 5157 self._retreat(self._index - 1) 5158 return None 5159 else: 5160 return None 5161 5162 type_token = self._prev.token_type 5163 5164 if type_token == TokenType.PSEUDO_TYPE: 5165 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5166 5167 if type_token == TokenType.OBJECT_IDENTIFIER: 5168 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5169 5170 # https://materialize.com/docs/sql/types/map/ 5171 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5172 key_type = self._parse_types( 5173 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5174 ) 5175 if not self._match(TokenType.FARROW): 5176 self._retreat(index) 5177 return None 5178 5179 value_type = self._parse_types( 5180 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5181 ) 5182 if not self._match(TokenType.R_BRACKET): 5183 self._retreat(index) 5184 return None 5185 5186 return exp.DataType( 5187 this=exp.DataType.Type.MAP, 5188 expressions=[key_type, value_type], 5189 nested=True, 5190 prefix=prefix, 5191 ) 5192 5193 nested = type_token in self.NESTED_TYPE_TOKENS 5194 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5195 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5196 expressions = None 5197 maybe_func = False 5198 5199 if self._match(TokenType.L_PAREN): 5200 if is_struct: 5201 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5202 elif nested: 5203 expressions = self._parse_csv( 5204 lambda: self._parse_types( 5205 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5206 ) 5207 ) 5208 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5209 this = expressions[0] 5210 this.set("nullable", True) 5211 self._match_r_paren() 5212 return this 5213 elif type_token in self.ENUM_TYPE_TOKENS: 5214 expressions = self._parse_csv(self._parse_equality) 5215 elif is_aggregate: 5216 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5217 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5218 ) 5219 if not func_or_ident: 5220 return None 5221 expressions = [func_or_ident] 5222 if self._match(TokenType.COMMA): 5223 expressions.extend( 5224 self._parse_csv( 5225 lambda: self._parse_types( 5226 check_func=check_func, 5227 schema=schema, 5228 allow_identifiers=allow_identifiers, 5229 ) 5230 ) 5231 ) 5232 else: 5233 expressions = self._parse_csv(self._parse_type_size) 5234 5235 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5236 if type_token == TokenType.VECTOR and len(expressions) == 2: 5237 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5238 5239 if not expressions or not self._match(TokenType.R_PAREN): 5240 self._retreat(index) 5241 return None 5242 5243 maybe_func = True 5244 5245 values: t.Optional[t.List[exp.Expression]] = None 5246 5247 if nested and self._match(TokenType.LT): 5248 if is_struct: 5249 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5250 else: 5251 expressions = self._parse_csv( 5252 lambda: self._parse_types( 5253 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5254 ) 5255 ) 5256 5257 if not self._match(TokenType.GT): 5258 self.raise_error("Expecting >") 5259 5260 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5261 values = self._parse_csv(self._parse_assignment) 5262 if not values and is_struct: 5263 values = None 5264 self._retreat(self._index - 1) 5265 else: 5266 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5267 5268 if type_token in self.TIMESTAMPS: 5269 if self._match_text_seq("WITH", "TIME", "ZONE"): 5270 maybe_func = False 5271 tz_type = ( 5272 exp.DataType.Type.TIMETZ 5273 if type_token in self.TIMES 5274 else exp.DataType.Type.TIMESTAMPTZ 5275 ) 5276 this = exp.DataType(this=tz_type, expressions=expressions) 5277 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5278 maybe_func = False 5279 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5280 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5281 maybe_func = False 5282 elif type_token == TokenType.INTERVAL: 5283 unit = self._parse_var(upper=True) 5284 if unit: 5285 if self._match_text_seq("TO"): 5286 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5287 5288 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5289 else: 5290 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5291 elif type_token == TokenType.VOID: 5292 this = exp.DataType(this=exp.DataType.Type.NULL) 5293 5294 if maybe_func and check_func: 5295 index2 = self._index 5296 peek = self._parse_string() 5297 5298 if not peek: 5299 self._retreat(index) 5300 return None 5301 5302 self._retreat(index2) 5303 5304 if not this: 5305 if self._match_text_seq("UNSIGNED"): 5306 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5307 if not unsigned_type_token: 5308 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5309 5310 type_token = unsigned_type_token or type_token 5311 5312 this = exp.DataType( 5313 this=exp.DataType.Type[type_token.value], 5314 expressions=expressions, 5315 nested=nested, 5316 prefix=prefix, 5317 ) 5318 5319 # Empty arrays/structs are allowed 5320 if values is not None: 5321 cls = exp.Struct if is_struct else exp.Array 5322 this = exp.cast(cls(expressions=values), this, copy=False) 5323 5324 elif expressions: 5325 this.set("expressions", expressions) 5326 5327 # https://materialize.com/docs/sql/types/list/#type-name 5328 while self._match(TokenType.LIST): 5329 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5330 5331 index = self._index 5332 5333 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5334 matched_array = self._match(TokenType.ARRAY) 5335 5336 while self._curr: 5337 datatype_token = self._prev.token_type 5338 matched_l_bracket = self._match(TokenType.L_BRACKET) 5339 5340 if (not matched_l_bracket and not matched_array) or ( 5341 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5342 ): 5343 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5344 # not to be confused with the fixed size array parsing 5345 break 5346 5347 matched_array = False 5348 values = self._parse_csv(self._parse_assignment) or None 5349 if ( 5350 values 5351 and not schema 5352 and ( 5353 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5354 ) 5355 ): 5356 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5357 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5358 self._retreat(index) 5359 break 5360 5361 this = exp.DataType( 5362 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5363 ) 5364 self._match(TokenType.R_BRACKET) 5365 5366 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5367 converter = self.TYPE_CONVERTERS.get(this.this) 5368 if converter: 5369 this = converter(t.cast(exp.DataType, this)) 5370 5371 return this 5372 5373 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5374 index = self._index 5375 5376 if ( 5377 self._curr 5378 and self._next 5379 and self._curr.token_type in self.TYPE_TOKENS 5380 and self._next.token_type in self.TYPE_TOKENS 5381 ): 5382 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5383 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5384 this = self._parse_id_var() 5385 else: 5386 this = ( 5387 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5388 or self._parse_id_var() 5389 ) 5390 5391 self._match(TokenType.COLON) 5392 5393 if ( 5394 type_required 5395 and not isinstance(this, exp.DataType) 5396 and not self._match_set(self.TYPE_TOKENS, advance=False) 5397 ): 5398 self._retreat(index) 5399 return self._parse_types() 5400 5401 return self._parse_column_def(this) 5402 5403 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5404 if not self._match_text_seq("AT", "TIME", "ZONE"): 5405 return this 5406 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5407 5408 def _parse_column(self) -> t.Optional[exp.Expression]: 5409 this = self._parse_column_reference() 5410 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5411 5412 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5413 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5414 5415 return column 5416 5417 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5418 this = self._parse_field() 5419 if ( 5420 not this 5421 and self._match(TokenType.VALUES, advance=False) 5422 and self.VALUES_FOLLOWED_BY_PAREN 5423 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5424 ): 5425 this = self._parse_id_var() 5426 5427 if isinstance(this, exp.Identifier): 5428 # We bubble up comments from the Identifier to the Column 5429 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5430 5431 return this 5432 5433 def _parse_colon_as_variant_extract( 5434 self, this: t.Optional[exp.Expression] 5435 ) -> t.Optional[exp.Expression]: 5436 casts = [] 5437 json_path = [] 5438 escape = None 5439 5440 while self._match(TokenType.COLON): 5441 start_index = self._index 5442 5443 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5444 path = self._parse_column_ops( 5445 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5446 ) 5447 5448 # The cast :: operator has a lower precedence than the extraction operator :, so 5449 # we rearrange the AST appropriately to avoid casting the JSON path 5450 while isinstance(path, exp.Cast): 5451 casts.append(path.to) 5452 path = path.this 5453 5454 if casts: 5455 dcolon_offset = next( 5456 i 5457 for i, t in enumerate(self._tokens[start_index:]) 5458 if t.token_type == TokenType.DCOLON 5459 ) 5460 end_token = self._tokens[start_index + dcolon_offset - 1] 5461 else: 5462 end_token = self._prev 5463 5464 if path: 5465 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5466 # it'll roundtrip to a string literal in GET_PATH 5467 if isinstance(path, exp.Identifier) and path.quoted: 5468 escape = True 5469 5470 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5471 5472 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5473 # Databricks transforms it back to the colon/dot notation 5474 if json_path: 5475 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5476 5477 if json_path_expr: 5478 json_path_expr.set("escape", escape) 5479 5480 this = self.expression( 5481 exp.JSONExtract, 5482 this=this, 5483 expression=json_path_expr, 5484 variant_extract=True, 5485 ) 5486 5487 while casts: 5488 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5489 5490 return this 5491 5492 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5493 return self._parse_types() 5494 5495 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5496 this = self._parse_bracket(this) 5497 5498 while self._match_set(self.COLUMN_OPERATORS): 5499 op_token = self._prev.token_type 5500 op = self.COLUMN_OPERATORS.get(op_token) 5501 5502 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5503 field = self._parse_dcolon() 5504 if not field: 5505 self.raise_error("Expected type") 5506 elif op and self._curr: 5507 field = self._parse_column_reference() or self._parse_bracket() 5508 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5509 field = self._parse_column_ops(field) 5510 else: 5511 field = self._parse_field(any_token=True, anonymous_func=True) 5512 5513 # Function calls can be qualified, e.g., x.y.FOO() 5514 # This converts the final AST to a series of Dots leading to the function call 5515 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5516 if isinstance(field, (exp.Func, exp.Window)) and this: 5517 this = this.transform( 5518 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5519 ) 5520 5521 if op: 5522 this = op(self, this, field) 5523 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5524 this = self.expression( 5525 exp.Column, 5526 comments=this.comments, 5527 this=field, 5528 table=this.this, 5529 db=this.args.get("table"), 5530 catalog=this.args.get("db"), 5531 ) 5532 elif isinstance(field, exp.Window): 5533 # Move the exp.Dot's to the window's function 5534 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5535 field.set("this", window_func) 5536 this = field 5537 else: 5538 this = self.expression(exp.Dot, this=this, expression=field) 5539 5540 if field and field.comments: 5541 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5542 5543 this = self._parse_bracket(this) 5544 5545 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5546 5547 def _parse_primary(self) -> t.Optional[exp.Expression]: 5548 if self._match_set(self.PRIMARY_PARSERS): 5549 token_type = self._prev.token_type 5550 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5551 5552 if token_type == TokenType.STRING: 5553 expressions = [primary] 5554 while self._match(TokenType.STRING): 5555 expressions.append(exp.Literal.string(self._prev.text)) 5556 5557 if len(expressions) > 1: 5558 return self.expression(exp.Concat, expressions=expressions) 5559 5560 return primary 5561 5562 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5563 return exp.Literal.number(f"0.{self._prev.text}") 5564 5565 if self._match(TokenType.L_PAREN): 5566 comments = self._prev_comments 5567 query = self._parse_select() 5568 5569 if query: 5570 expressions = [query] 5571 else: 5572 expressions = self._parse_expressions() 5573 5574 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5575 5576 if not this and self._match(TokenType.R_PAREN, advance=False): 5577 this = self.expression(exp.Tuple) 5578 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5579 this = self._parse_subquery(this=this, parse_alias=False) 5580 elif isinstance(this, exp.Subquery): 5581 this = self._parse_subquery( 5582 this=self._parse_set_operations(this), parse_alias=False 5583 ) 5584 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5585 this = self.expression(exp.Tuple, expressions=expressions) 5586 else: 5587 this = self.expression(exp.Paren, this=this) 5588 5589 if this: 5590 this.add_comments(comments) 5591 5592 self._match_r_paren(expression=this) 5593 return this 5594 5595 return None 5596 5597 def _parse_field( 5598 self, 5599 any_token: bool = False, 5600 tokens: t.Optional[t.Collection[TokenType]] = None, 5601 anonymous_func: bool = False, 5602 ) -> t.Optional[exp.Expression]: 5603 if anonymous_func: 5604 field = ( 5605 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5606 or self._parse_primary() 5607 ) 5608 else: 5609 field = self._parse_primary() or self._parse_function( 5610 anonymous=anonymous_func, any_token=any_token 5611 ) 5612 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5613 5614 def _parse_function( 5615 self, 5616 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5617 anonymous: bool = False, 5618 optional_parens: bool = True, 5619 any_token: bool = False, 5620 ) -> t.Optional[exp.Expression]: 5621 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5622 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5623 fn_syntax = False 5624 if ( 5625 self._match(TokenType.L_BRACE, advance=False) 5626 and self._next 5627 and self._next.text.upper() == "FN" 5628 ): 5629 self._advance(2) 5630 fn_syntax = True 5631 5632 func = self._parse_function_call( 5633 functions=functions, 5634 anonymous=anonymous, 5635 optional_parens=optional_parens, 5636 any_token=any_token, 5637 ) 5638 5639 if fn_syntax: 5640 self._match(TokenType.R_BRACE) 5641 5642 return func 5643 5644 def _parse_function_call( 5645 self, 5646 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5647 anonymous: bool = False, 5648 optional_parens: bool = True, 5649 any_token: bool = False, 5650 ) -> t.Optional[exp.Expression]: 5651 if not self._curr: 5652 return None 5653 5654 comments = self._curr.comments 5655 token = self._curr 5656 token_type = self._curr.token_type 5657 this = self._curr.text 5658 upper = this.upper() 5659 5660 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5661 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5662 self._advance() 5663 return self._parse_window(parser(self)) 5664 5665 if not self._next or self._next.token_type != TokenType.L_PAREN: 5666 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5667 self._advance() 5668 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5669 5670 return None 5671 5672 if any_token: 5673 if token_type in self.RESERVED_TOKENS: 5674 return None 5675 elif token_type not in self.FUNC_TOKENS: 5676 return None 5677 5678 self._advance(2) 5679 5680 parser = self.FUNCTION_PARSERS.get(upper) 5681 if parser and not anonymous: 5682 this = parser(self) 5683 else: 5684 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5685 5686 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5687 this = self.expression( 5688 subquery_predicate, comments=comments, this=self._parse_select() 5689 ) 5690 self._match_r_paren() 5691 return this 5692 5693 if functions is None: 5694 functions = self.FUNCTIONS 5695 5696 function = functions.get(upper) 5697 known_function = function and not anonymous 5698 5699 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5700 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5701 5702 post_func_comments = self._curr and self._curr.comments 5703 if known_function and post_func_comments: 5704 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5705 # call we'll construct it as exp.Anonymous, even if it's "known" 5706 if any( 5707 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5708 for comment in post_func_comments 5709 ): 5710 known_function = False 5711 5712 if alias and known_function: 5713 args = self._kv_to_prop_eq(args) 5714 5715 if known_function: 5716 func_builder = t.cast(t.Callable, function) 5717 5718 if "dialect" in func_builder.__code__.co_varnames: 5719 func = func_builder(args, dialect=self.dialect) 5720 else: 5721 func = func_builder(args) 5722 5723 func = self.validate_expression(func, args) 5724 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5725 func.meta["name"] = this 5726 5727 this = func 5728 else: 5729 if token_type == TokenType.IDENTIFIER: 5730 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5731 5732 this = self.expression(exp.Anonymous, this=this, expressions=args) 5733 this = this.update_positions(token) 5734 5735 if isinstance(this, exp.Expression): 5736 this.add_comments(comments) 5737 5738 self._match_r_paren(this) 5739 return self._parse_window(this) 5740 5741 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5742 return expression 5743 5744 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5745 transformed = [] 5746 5747 for index, e in enumerate(expressions): 5748 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5749 if isinstance(e, exp.Alias): 5750 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5751 5752 if not isinstance(e, exp.PropertyEQ): 5753 e = self.expression( 5754 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5755 ) 5756 5757 if isinstance(e.this, exp.Column): 5758 e.this.replace(e.this.this) 5759 else: 5760 e = self._to_prop_eq(e, index) 5761 5762 transformed.append(e) 5763 5764 return transformed 5765 5766 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5767 return self._parse_statement() 5768 5769 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5770 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5771 5772 def _parse_user_defined_function( 5773 self, kind: t.Optional[TokenType] = None 5774 ) -> t.Optional[exp.Expression]: 5775 this = self._parse_table_parts(schema=True) 5776 5777 if not self._match(TokenType.L_PAREN): 5778 return this 5779 5780 expressions = self._parse_csv(self._parse_function_parameter) 5781 self._match_r_paren() 5782 return self.expression( 5783 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5784 ) 5785 5786 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5787 literal = self._parse_primary() 5788 if literal: 5789 return self.expression(exp.Introducer, this=token.text, expression=literal) 5790 5791 return self._identifier_expression(token) 5792 5793 def _parse_session_parameter(self) -> exp.SessionParameter: 5794 kind = None 5795 this = self._parse_id_var() or self._parse_primary() 5796 5797 if this and self._match(TokenType.DOT): 5798 kind = this.name 5799 this = self._parse_var() or self._parse_primary() 5800 5801 return self.expression(exp.SessionParameter, this=this, kind=kind) 5802 5803 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5804 return self._parse_id_var() 5805 5806 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5807 index = self._index 5808 5809 if self._match(TokenType.L_PAREN): 5810 expressions = t.cast( 5811 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5812 ) 5813 5814 if not self._match(TokenType.R_PAREN): 5815 self._retreat(index) 5816 else: 5817 expressions = [self._parse_lambda_arg()] 5818 5819 if self._match_set(self.LAMBDAS): 5820 return self.LAMBDAS[self._prev.token_type](self, expressions) 5821 5822 self._retreat(index) 5823 5824 this: t.Optional[exp.Expression] 5825 5826 if self._match(TokenType.DISTINCT): 5827 this = self.expression( 5828 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5829 ) 5830 else: 5831 this = self._parse_select_or_expression(alias=alias) 5832 5833 return self._parse_limit( 5834 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5835 ) 5836 5837 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5838 index = self._index 5839 if not self._match(TokenType.L_PAREN): 5840 return this 5841 5842 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5843 # expr can be of both types 5844 if self._match_set(self.SELECT_START_TOKENS): 5845 self._retreat(index) 5846 return this 5847 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5848 self._match_r_paren() 5849 return self.expression(exp.Schema, this=this, expressions=args) 5850 5851 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5852 return self._parse_column_def(self._parse_field(any_token=True)) 5853 5854 def _parse_column_def( 5855 self, this: t.Optional[exp.Expression], computed_column: bool = True 5856 ) -> t.Optional[exp.Expression]: 5857 # column defs are not really columns, they're identifiers 5858 if isinstance(this, exp.Column): 5859 this = this.this 5860 5861 if not computed_column: 5862 self._match(TokenType.ALIAS) 5863 5864 kind = self._parse_types(schema=True) 5865 5866 if self._match_text_seq("FOR", "ORDINALITY"): 5867 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5868 5869 constraints: t.List[exp.Expression] = [] 5870 5871 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5872 ("ALIAS", "MATERIALIZED") 5873 ): 5874 persisted = self._prev.text.upper() == "MATERIALIZED" 5875 constraint_kind = exp.ComputedColumnConstraint( 5876 this=self._parse_assignment(), 5877 persisted=persisted or self._match_text_seq("PERSISTED"), 5878 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5879 ) 5880 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5881 elif ( 5882 kind 5883 and self._match(TokenType.ALIAS, advance=False) 5884 and ( 5885 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5886 or (self._next and self._next.token_type == TokenType.L_PAREN) 5887 ) 5888 ): 5889 self._advance() 5890 constraints.append( 5891 self.expression( 5892 exp.ColumnConstraint, 5893 kind=exp.ComputedColumnConstraint( 5894 this=self._parse_disjunction(), 5895 persisted=self._match_texts(("STORED", "VIRTUAL")) 5896 and self._prev.text.upper() == "STORED", 5897 ), 5898 ) 5899 ) 5900 5901 while True: 5902 constraint = self._parse_column_constraint() 5903 if not constraint: 5904 break 5905 constraints.append(constraint) 5906 5907 if not kind and not constraints: 5908 return this 5909 5910 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5911 5912 def _parse_auto_increment( 5913 self, 5914 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5915 start = None 5916 increment = None 5917 5918 if self._match(TokenType.L_PAREN, advance=False): 5919 args = self._parse_wrapped_csv(self._parse_bitwise) 5920 start = seq_get(args, 0) 5921 increment = seq_get(args, 1) 5922 elif self._match_text_seq("START"): 5923 start = self._parse_bitwise() 5924 self._match_text_seq("INCREMENT") 5925 increment = self._parse_bitwise() 5926 5927 if start and increment: 5928 return exp.GeneratedAsIdentityColumnConstraint( 5929 start=start, increment=increment, this=False 5930 ) 5931 5932 return exp.AutoIncrementColumnConstraint() 5933 5934 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5935 if not self._match_text_seq("REFRESH"): 5936 self._retreat(self._index - 1) 5937 return None 5938 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5939 5940 def _parse_compress(self) -> exp.CompressColumnConstraint: 5941 if self._match(TokenType.L_PAREN, advance=False): 5942 return self.expression( 5943 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5944 ) 5945 5946 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5947 5948 def _parse_generated_as_identity( 5949 self, 5950 ) -> ( 5951 exp.GeneratedAsIdentityColumnConstraint 5952 | exp.ComputedColumnConstraint 5953 | exp.GeneratedAsRowColumnConstraint 5954 ): 5955 if self._match_text_seq("BY", "DEFAULT"): 5956 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5957 this = self.expression( 5958 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5959 ) 5960 else: 5961 self._match_text_seq("ALWAYS") 5962 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5963 5964 self._match(TokenType.ALIAS) 5965 5966 if self._match_text_seq("ROW"): 5967 start = self._match_text_seq("START") 5968 if not start: 5969 self._match(TokenType.END) 5970 hidden = self._match_text_seq("HIDDEN") 5971 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5972 5973 identity = self._match_text_seq("IDENTITY") 5974 5975 if self._match(TokenType.L_PAREN): 5976 if self._match(TokenType.START_WITH): 5977 this.set("start", self._parse_bitwise()) 5978 if self._match_text_seq("INCREMENT", "BY"): 5979 this.set("increment", self._parse_bitwise()) 5980 if self._match_text_seq("MINVALUE"): 5981 this.set("minvalue", self._parse_bitwise()) 5982 if self._match_text_seq("MAXVALUE"): 5983 this.set("maxvalue", self._parse_bitwise()) 5984 5985 if self._match_text_seq("CYCLE"): 5986 this.set("cycle", True) 5987 elif self._match_text_seq("NO", "CYCLE"): 5988 this.set("cycle", False) 5989 5990 if not identity: 5991 this.set("expression", self._parse_range()) 5992 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5993 args = self._parse_csv(self._parse_bitwise) 5994 this.set("start", seq_get(args, 0)) 5995 this.set("increment", seq_get(args, 1)) 5996 5997 self._match_r_paren() 5998 5999 return this 6000 6001 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6002 self._match_text_seq("LENGTH") 6003 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6004 6005 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6006 if self._match_text_seq("NULL"): 6007 return self.expression(exp.NotNullColumnConstraint) 6008 if self._match_text_seq("CASESPECIFIC"): 6009 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6010 if self._match_text_seq("FOR", "REPLICATION"): 6011 return self.expression(exp.NotForReplicationColumnConstraint) 6012 6013 # Unconsume the `NOT` token 6014 self._retreat(self._index - 1) 6015 return None 6016 6017 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6018 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6019 6020 procedure_option_follows = ( 6021 self._match(TokenType.WITH, advance=False) 6022 and self._next 6023 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6024 ) 6025 6026 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6027 return self.expression( 6028 exp.ColumnConstraint, 6029 this=this, 6030 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6031 ) 6032 6033 return this 6034 6035 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6036 if not self._match(TokenType.CONSTRAINT): 6037 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6038 6039 return self.expression( 6040 exp.Constraint, 6041 this=self._parse_id_var(), 6042 expressions=self._parse_unnamed_constraints(), 6043 ) 6044 6045 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6046 constraints = [] 6047 while True: 6048 constraint = self._parse_unnamed_constraint() or self._parse_function() 6049 if not constraint: 6050 break 6051 constraints.append(constraint) 6052 6053 return constraints 6054 6055 def _parse_unnamed_constraint( 6056 self, constraints: t.Optional[t.Collection[str]] = None 6057 ) -> t.Optional[exp.Expression]: 6058 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6059 constraints or self.CONSTRAINT_PARSERS 6060 ): 6061 return None 6062 6063 constraint = self._prev.text.upper() 6064 if constraint not in self.CONSTRAINT_PARSERS: 6065 self.raise_error(f"No parser found for schema constraint {constraint}.") 6066 6067 return self.CONSTRAINT_PARSERS[constraint](self) 6068 6069 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6070 return self._parse_id_var(any_token=False) 6071 6072 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6073 self._match_text_seq("KEY") 6074 return self.expression( 6075 exp.UniqueColumnConstraint, 6076 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6077 this=self._parse_schema(self._parse_unique_key()), 6078 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6079 on_conflict=self._parse_on_conflict(), 6080 options=self._parse_key_constraint_options(), 6081 ) 6082 6083 def _parse_key_constraint_options(self) -> t.List[str]: 6084 options = [] 6085 while True: 6086 if not self._curr: 6087 break 6088 6089 if self._match(TokenType.ON): 6090 action = None 6091 on = self._advance_any() and self._prev.text 6092 6093 if self._match_text_seq("NO", "ACTION"): 6094 action = "NO ACTION" 6095 elif self._match_text_seq("CASCADE"): 6096 action = "CASCADE" 6097 elif self._match_text_seq("RESTRICT"): 6098 action = "RESTRICT" 6099 elif self._match_pair(TokenType.SET, TokenType.NULL): 6100 action = "SET NULL" 6101 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6102 action = "SET DEFAULT" 6103 else: 6104 self.raise_error("Invalid key constraint") 6105 6106 options.append(f"ON {on} {action}") 6107 else: 6108 var = self._parse_var_from_options( 6109 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6110 ) 6111 if not var: 6112 break 6113 options.append(var.name) 6114 6115 return options 6116 6117 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6118 if match and not self._match(TokenType.REFERENCES): 6119 return None 6120 6121 expressions = None 6122 this = self._parse_table(schema=True) 6123 options = self._parse_key_constraint_options() 6124 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6125 6126 def _parse_foreign_key(self) -> exp.ForeignKey: 6127 expressions = ( 6128 self._parse_wrapped_id_vars() 6129 if not self._match(TokenType.REFERENCES, advance=False) 6130 else None 6131 ) 6132 reference = self._parse_references() 6133 on_options = {} 6134 6135 while self._match(TokenType.ON): 6136 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6137 self.raise_error("Expected DELETE or UPDATE") 6138 6139 kind = self._prev.text.lower() 6140 6141 if self._match_text_seq("NO", "ACTION"): 6142 action = "NO ACTION" 6143 elif self._match(TokenType.SET): 6144 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6145 action = "SET " + self._prev.text.upper() 6146 else: 6147 self._advance() 6148 action = self._prev.text.upper() 6149 6150 on_options[kind] = action 6151 6152 return self.expression( 6153 exp.ForeignKey, 6154 expressions=expressions, 6155 reference=reference, 6156 options=self._parse_key_constraint_options(), 6157 **on_options, # type: ignore 6158 ) 6159 6160 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6161 return self._parse_ordered() or self._parse_field() 6162 6163 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6164 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6165 self._retreat(self._index - 1) 6166 return None 6167 6168 id_vars = self._parse_wrapped_id_vars() 6169 return self.expression( 6170 exp.PeriodForSystemTimeConstraint, 6171 this=seq_get(id_vars, 0), 6172 expression=seq_get(id_vars, 1), 6173 ) 6174 6175 def _parse_primary_key( 6176 self, wrapped_optional: bool = False, in_props: bool = False 6177 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6178 desc = ( 6179 self._match_set((TokenType.ASC, TokenType.DESC)) 6180 and self._prev.token_type == TokenType.DESC 6181 ) 6182 6183 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6184 return self.expression( 6185 exp.PrimaryKeyColumnConstraint, 6186 desc=desc, 6187 options=self._parse_key_constraint_options(), 6188 ) 6189 6190 expressions = self._parse_wrapped_csv( 6191 self._parse_primary_key_part, optional=wrapped_optional 6192 ) 6193 options = self._parse_key_constraint_options() 6194 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6195 6196 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6197 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6198 6199 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6200 """ 6201 Parses a datetime column in ODBC format. We parse the column into the corresponding 6202 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6203 same as we did for `DATE('yyyy-mm-dd')`. 6204 6205 Reference: 6206 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6207 """ 6208 self._match(TokenType.VAR) 6209 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6210 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6211 if not self._match(TokenType.R_BRACE): 6212 self.raise_error("Expected }") 6213 return expression 6214 6215 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6216 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6217 return this 6218 6219 bracket_kind = self._prev.token_type 6220 if ( 6221 bracket_kind == TokenType.L_BRACE 6222 and self._curr 6223 and self._curr.token_type == TokenType.VAR 6224 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6225 ): 6226 return self._parse_odbc_datetime_literal() 6227 6228 expressions = self._parse_csv( 6229 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6230 ) 6231 6232 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6233 self.raise_error("Expected ]") 6234 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6235 self.raise_error("Expected }") 6236 6237 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6238 if bracket_kind == TokenType.L_BRACE: 6239 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6240 elif not this: 6241 this = build_array_constructor( 6242 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6243 ) 6244 else: 6245 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6246 if constructor_type: 6247 return build_array_constructor( 6248 constructor_type, 6249 args=expressions, 6250 bracket_kind=bracket_kind, 6251 dialect=self.dialect, 6252 ) 6253 6254 expressions = apply_index_offset( 6255 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6256 ) 6257 this = self.expression( 6258 exp.Bracket, 6259 this=this, 6260 expressions=expressions, 6261 comments=this.pop_comments(), 6262 ) 6263 6264 self._add_comments(this) 6265 return self._parse_bracket(this) 6266 6267 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6268 if self._match(TokenType.COLON): 6269 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6270 return this 6271 6272 def _parse_case(self) -> t.Optional[exp.Expression]: 6273 ifs = [] 6274 default = None 6275 6276 comments = self._prev_comments 6277 expression = self._parse_assignment() 6278 6279 while self._match(TokenType.WHEN): 6280 this = self._parse_assignment() 6281 self._match(TokenType.THEN) 6282 then = self._parse_assignment() 6283 ifs.append(self.expression(exp.If, this=this, true=then)) 6284 6285 if self._match(TokenType.ELSE): 6286 default = self._parse_assignment() 6287 6288 if not self._match(TokenType.END): 6289 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6290 default = exp.column("interval") 6291 else: 6292 self.raise_error("Expected END after CASE", self._prev) 6293 6294 return self.expression( 6295 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6296 ) 6297 6298 def _parse_if(self) -> t.Optional[exp.Expression]: 6299 if self._match(TokenType.L_PAREN): 6300 args = self._parse_csv( 6301 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6302 ) 6303 this = self.validate_expression(exp.If.from_arg_list(args), args) 6304 self._match_r_paren() 6305 else: 6306 index = self._index - 1 6307 6308 if self.NO_PAREN_IF_COMMANDS and index == 0: 6309 return self._parse_as_command(self._prev) 6310 6311 condition = self._parse_assignment() 6312 6313 if not condition: 6314 self._retreat(index) 6315 return None 6316 6317 self._match(TokenType.THEN) 6318 true = self._parse_assignment() 6319 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6320 self._match(TokenType.END) 6321 this = self.expression(exp.If, this=condition, true=true, false=false) 6322 6323 return this 6324 6325 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6326 if not self._match_text_seq("VALUE", "FOR"): 6327 self._retreat(self._index - 1) 6328 return None 6329 6330 return self.expression( 6331 exp.NextValueFor, 6332 this=self._parse_column(), 6333 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6334 ) 6335 6336 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6337 this = self._parse_function() or self._parse_var_or_string(upper=True) 6338 6339 if self._match(TokenType.FROM): 6340 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6341 6342 if not self._match(TokenType.COMMA): 6343 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6344 6345 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6346 6347 def _parse_gap_fill(self) -> exp.GapFill: 6348 self._match(TokenType.TABLE) 6349 this = self._parse_table() 6350 6351 self._match(TokenType.COMMA) 6352 args = [this, *self._parse_csv(self._parse_lambda)] 6353 6354 gap_fill = exp.GapFill.from_arg_list(args) 6355 return self.validate_expression(gap_fill, args) 6356 6357 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6358 this = self._parse_assignment() 6359 6360 if not self._match(TokenType.ALIAS): 6361 if self._match(TokenType.COMMA): 6362 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6363 6364 self.raise_error("Expected AS after CAST") 6365 6366 fmt = None 6367 to = self._parse_types() 6368 6369 default = self._match(TokenType.DEFAULT) 6370 if default: 6371 default = self._parse_bitwise() 6372 self._match_text_seq("ON", "CONVERSION", "ERROR") 6373 6374 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6375 fmt_string = self._parse_string() 6376 fmt = self._parse_at_time_zone(fmt_string) 6377 6378 if not to: 6379 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6380 if to.this in exp.DataType.TEMPORAL_TYPES: 6381 this = self.expression( 6382 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6383 this=this, 6384 format=exp.Literal.string( 6385 format_time( 6386 fmt_string.this if fmt_string else "", 6387 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6388 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6389 ) 6390 ), 6391 safe=safe, 6392 ) 6393 6394 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6395 this.set("zone", fmt.args["zone"]) 6396 return this 6397 elif not to: 6398 self.raise_error("Expected TYPE after CAST") 6399 elif isinstance(to, exp.Identifier): 6400 to = exp.DataType.build(to.name, udt=True) 6401 elif to.this == exp.DataType.Type.CHAR: 6402 if self._match(TokenType.CHARACTER_SET): 6403 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6404 6405 return self.expression( 6406 exp.Cast if strict else exp.TryCast, 6407 this=this, 6408 to=to, 6409 format=fmt, 6410 safe=safe, 6411 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6412 default=default, 6413 ) 6414 6415 def _parse_string_agg(self) -> exp.GroupConcat: 6416 if self._match(TokenType.DISTINCT): 6417 args: t.List[t.Optional[exp.Expression]] = [ 6418 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6419 ] 6420 if self._match(TokenType.COMMA): 6421 args.extend(self._parse_csv(self._parse_assignment)) 6422 else: 6423 args = self._parse_csv(self._parse_assignment) # type: ignore 6424 6425 if self._match_text_seq("ON", "OVERFLOW"): 6426 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6427 if self._match_text_seq("ERROR"): 6428 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6429 else: 6430 self._match_text_seq("TRUNCATE") 6431 on_overflow = self.expression( 6432 exp.OverflowTruncateBehavior, 6433 this=self._parse_string(), 6434 with_count=( 6435 self._match_text_seq("WITH", "COUNT") 6436 or not self._match_text_seq("WITHOUT", "COUNT") 6437 ), 6438 ) 6439 else: 6440 on_overflow = None 6441 6442 index = self._index 6443 if not self._match(TokenType.R_PAREN) and args: 6444 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6445 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6446 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6447 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6448 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6449 6450 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6451 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6452 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6453 if not self._match_text_seq("WITHIN", "GROUP"): 6454 self._retreat(index) 6455 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6456 6457 # The corresponding match_r_paren will be called in parse_function (caller) 6458 self._match_l_paren() 6459 6460 return self.expression( 6461 exp.GroupConcat, 6462 this=self._parse_order(this=seq_get(args, 0)), 6463 separator=seq_get(args, 1), 6464 on_overflow=on_overflow, 6465 ) 6466 6467 def _parse_convert( 6468 self, strict: bool, safe: t.Optional[bool] = None 6469 ) -> t.Optional[exp.Expression]: 6470 this = self._parse_bitwise() 6471 6472 if self._match(TokenType.USING): 6473 to: t.Optional[exp.Expression] = self.expression( 6474 exp.CharacterSet, this=self._parse_var() 6475 ) 6476 elif self._match(TokenType.COMMA): 6477 to = self._parse_types() 6478 else: 6479 to = None 6480 6481 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6482 6483 def _parse_xml_table(self) -> exp.XMLTable: 6484 namespaces = None 6485 passing = None 6486 columns = None 6487 6488 if self._match_text_seq("XMLNAMESPACES", "("): 6489 namespaces = self._parse_xml_namespace() 6490 self._match_text_seq(")", ",") 6491 6492 this = self._parse_string() 6493 6494 if self._match_text_seq("PASSING"): 6495 # The BY VALUE keywords are optional and are provided for semantic clarity 6496 self._match_text_seq("BY", "VALUE") 6497 passing = self._parse_csv(self._parse_column) 6498 6499 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6500 6501 if self._match_text_seq("COLUMNS"): 6502 columns = self._parse_csv(self._parse_field_def) 6503 6504 return self.expression( 6505 exp.XMLTable, 6506 this=this, 6507 namespaces=namespaces, 6508 passing=passing, 6509 columns=columns, 6510 by_ref=by_ref, 6511 ) 6512 6513 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6514 namespaces = [] 6515 6516 while True: 6517 if self._match(TokenType.DEFAULT): 6518 uri = self._parse_string() 6519 else: 6520 uri = self._parse_alias(self._parse_string()) 6521 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6522 if not self._match(TokenType.COMMA): 6523 break 6524 6525 return namespaces 6526 6527 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6528 """ 6529 There are generally two variants of the DECODE function: 6530 6531 - DECODE(bin, charset) 6532 - DECODE(expression, search, result [, search, result] ... [, default]) 6533 6534 The second variant will always be parsed into a CASE expression. Note that NULL 6535 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6536 instead of relying on pattern matching. 6537 """ 6538 args = self._parse_csv(self._parse_assignment) 6539 6540 if len(args) < 3: 6541 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6542 6543 expression, *expressions = args 6544 if not expression: 6545 return None 6546 6547 ifs = [] 6548 for search, result in zip(expressions[::2], expressions[1::2]): 6549 if not search or not result: 6550 return None 6551 6552 if isinstance(search, exp.Literal): 6553 ifs.append( 6554 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6555 ) 6556 elif isinstance(search, exp.Null): 6557 ifs.append( 6558 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6559 ) 6560 else: 6561 cond = exp.or_( 6562 exp.EQ(this=expression.copy(), expression=search), 6563 exp.and_( 6564 exp.Is(this=expression.copy(), expression=exp.Null()), 6565 exp.Is(this=search.copy(), expression=exp.Null()), 6566 copy=False, 6567 ), 6568 copy=False, 6569 ) 6570 ifs.append(exp.If(this=cond, true=result)) 6571 6572 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6573 6574 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6575 self._match_text_seq("KEY") 6576 key = self._parse_column() 6577 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6578 self._match_text_seq("VALUE") 6579 value = self._parse_bitwise() 6580 6581 if not key and not value: 6582 return None 6583 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6584 6585 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6586 if not this or not self._match_text_seq("FORMAT", "JSON"): 6587 return this 6588 6589 return self.expression(exp.FormatJson, this=this) 6590 6591 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6592 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6593 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6594 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6595 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6596 else: 6597 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6598 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6599 6600 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6601 6602 if not empty and not error and not null: 6603 return None 6604 6605 return self.expression( 6606 exp.OnCondition, 6607 empty=empty, 6608 error=error, 6609 null=null, 6610 ) 6611 6612 def _parse_on_handling( 6613 self, on: str, *values: str 6614 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6615 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6616 for value in values: 6617 if self._match_text_seq(value, "ON", on): 6618 return f"{value} ON {on}" 6619 6620 index = self._index 6621 if self._match(TokenType.DEFAULT): 6622 default_value = self._parse_bitwise() 6623 if self._match_text_seq("ON", on): 6624 return default_value 6625 6626 self._retreat(index) 6627 6628 return None 6629 6630 @t.overload 6631 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6632 6633 @t.overload 6634 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6635 6636 def _parse_json_object(self, agg=False): 6637 star = self._parse_star() 6638 expressions = ( 6639 [star] 6640 if star 6641 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6642 ) 6643 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6644 6645 unique_keys = None 6646 if self._match_text_seq("WITH", "UNIQUE"): 6647 unique_keys = True 6648 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6649 unique_keys = False 6650 6651 self._match_text_seq("KEYS") 6652 6653 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6654 self._parse_type() 6655 ) 6656 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6657 6658 return self.expression( 6659 exp.JSONObjectAgg if agg else exp.JSONObject, 6660 expressions=expressions, 6661 null_handling=null_handling, 6662 unique_keys=unique_keys, 6663 return_type=return_type, 6664 encoding=encoding, 6665 ) 6666 6667 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6668 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6669 if not self._match_text_seq("NESTED"): 6670 this = self._parse_id_var() 6671 kind = self._parse_types(allow_identifiers=False) 6672 nested = None 6673 else: 6674 this = None 6675 kind = None 6676 nested = True 6677 6678 path = self._match_text_seq("PATH") and self._parse_string() 6679 nested_schema = nested and self._parse_json_schema() 6680 6681 return self.expression( 6682 exp.JSONColumnDef, 6683 this=this, 6684 kind=kind, 6685 path=path, 6686 nested_schema=nested_schema, 6687 ) 6688 6689 def _parse_json_schema(self) -> exp.JSONSchema: 6690 self._match_text_seq("COLUMNS") 6691 return self.expression( 6692 exp.JSONSchema, 6693 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6694 ) 6695 6696 def _parse_json_table(self) -> exp.JSONTable: 6697 this = self._parse_format_json(self._parse_bitwise()) 6698 path = self._match(TokenType.COMMA) and self._parse_string() 6699 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6700 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6701 schema = self._parse_json_schema() 6702 6703 return exp.JSONTable( 6704 this=this, 6705 schema=schema, 6706 path=path, 6707 error_handling=error_handling, 6708 empty_handling=empty_handling, 6709 ) 6710 6711 def _parse_match_against(self) -> exp.MatchAgainst: 6712 expressions = self._parse_csv(self._parse_column) 6713 6714 self._match_text_seq(")", "AGAINST", "(") 6715 6716 this = self._parse_string() 6717 6718 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6719 modifier = "IN NATURAL LANGUAGE MODE" 6720 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6721 modifier = f"{modifier} WITH QUERY EXPANSION" 6722 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6723 modifier = "IN BOOLEAN MODE" 6724 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6725 modifier = "WITH QUERY EXPANSION" 6726 else: 6727 modifier = None 6728 6729 return self.expression( 6730 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6731 ) 6732 6733 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6734 def _parse_open_json(self) -> exp.OpenJSON: 6735 this = self._parse_bitwise() 6736 path = self._match(TokenType.COMMA) and self._parse_string() 6737 6738 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6739 this = self._parse_field(any_token=True) 6740 kind = self._parse_types() 6741 path = self._parse_string() 6742 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6743 6744 return self.expression( 6745 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6746 ) 6747 6748 expressions = None 6749 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6750 self._match_l_paren() 6751 expressions = self._parse_csv(_parse_open_json_column_def) 6752 6753 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6754 6755 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6756 args = self._parse_csv(self._parse_bitwise) 6757 6758 if self._match(TokenType.IN): 6759 return self.expression( 6760 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6761 ) 6762 6763 if haystack_first: 6764 haystack = seq_get(args, 0) 6765 needle = seq_get(args, 1) 6766 else: 6767 haystack = seq_get(args, 1) 6768 needle = seq_get(args, 0) 6769 6770 return self.expression( 6771 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6772 ) 6773 6774 def _parse_predict(self) -> exp.Predict: 6775 self._match_text_seq("MODEL") 6776 this = self._parse_table() 6777 6778 self._match(TokenType.COMMA) 6779 self._match_text_seq("TABLE") 6780 6781 return self.expression( 6782 exp.Predict, 6783 this=this, 6784 expression=self._parse_table(), 6785 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6786 ) 6787 6788 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6789 args = self._parse_csv(self._parse_table) 6790 return exp.JoinHint(this=func_name.upper(), expressions=args) 6791 6792 def _parse_substring(self) -> exp.Substring: 6793 # Postgres supports the form: substring(string [from int] [for int]) 6794 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6795 6796 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6797 6798 if self._match(TokenType.FROM): 6799 args.append(self._parse_bitwise()) 6800 if self._match(TokenType.FOR): 6801 if len(args) == 1: 6802 args.append(exp.Literal.number(1)) 6803 args.append(self._parse_bitwise()) 6804 6805 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6806 6807 def _parse_trim(self) -> exp.Trim: 6808 # https://www.w3resource.com/sql/character-functions/trim.php 6809 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6810 6811 position = None 6812 collation = None 6813 expression = None 6814 6815 if self._match_texts(self.TRIM_TYPES): 6816 position = self._prev.text.upper() 6817 6818 this = self._parse_bitwise() 6819 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6820 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6821 expression = self._parse_bitwise() 6822 6823 if invert_order: 6824 this, expression = expression, this 6825 6826 if self._match(TokenType.COLLATE): 6827 collation = self._parse_bitwise() 6828 6829 return self.expression( 6830 exp.Trim, this=this, position=position, expression=expression, collation=collation 6831 ) 6832 6833 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6834 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6835 6836 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6837 return self._parse_window(self._parse_id_var(), alias=True) 6838 6839 def _parse_respect_or_ignore_nulls( 6840 self, this: t.Optional[exp.Expression] 6841 ) -> t.Optional[exp.Expression]: 6842 if self._match_text_seq("IGNORE", "NULLS"): 6843 return self.expression(exp.IgnoreNulls, this=this) 6844 if self._match_text_seq("RESPECT", "NULLS"): 6845 return self.expression(exp.RespectNulls, this=this) 6846 return this 6847 6848 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6849 if self._match(TokenType.HAVING): 6850 self._match_texts(("MAX", "MIN")) 6851 max = self._prev.text.upper() != "MIN" 6852 return self.expression( 6853 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6854 ) 6855 6856 return this 6857 6858 def _parse_window( 6859 self, this: t.Optional[exp.Expression], alias: bool = False 6860 ) -> t.Optional[exp.Expression]: 6861 func = this 6862 comments = func.comments if isinstance(func, exp.Expression) else None 6863 6864 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6865 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6866 if self._match_text_seq("WITHIN", "GROUP"): 6867 order = self._parse_wrapped(self._parse_order) 6868 this = self.expression(exp.WithinGroup, this=this, expression=order) 6869 6870 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6871 self._match(TokenType.WHERE) 6872 this = self.expression( 6873 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6874 ) 6875 self._match_r_paren() 6876 6877 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6878 # Some dialects choose to implement and some do not. 6879 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6880 6881 # There is some code above in _parse_lambda that handles 6882 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6883 6884 # The below changes handle 6885 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6886 6887 # Oracle allows both formats 6888 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6889 # and Snowflake chose to do the same for familiarity 6890 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6891 if isinstance(this, exp.AggFunc): 6892 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6893 6894 if ignore_respect and ignore_respect is not this: 6895 ignore_respect.replace(ignore_respect.this) 6896 this = self.expression(ignore_respect.__class__, this=this) 6897 6898 this = self._parse_respect_or_ignore_nulls(this) 6899 6900 # bigquery select from window x AS (partition by ...) 6901 if alias: 6902 over = None 6903 self._match(TokenType.ALIAS) 6904 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6905 return this 6906 else: 6907 over = self._prev.text.upper() 6908 6909 if comments and isinstance(func, exp.Expression): 6910 func.pop_comments() 6911 6912 if not self._match(TokenType.L_PAREN): 6913 return self.expression( 6914 exp.Window, 6915 comments=comments, 6916 this=this, 6917 alias=self._parse_id_var(False), 6918 over=over, 6919 ) 6920 6921 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6922 6923 first = self._match(TokenType.FIRST) 6924 if self._match_text_seq("LAST"): 6925 first = False 6926 6927 partition, order = self._parse_partition_and_order() 6928 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6929 6930 if kind: 6931 self._match(TokenType.BETWEEN) 6932 start = self._parse_window_spec() 6933 self._match(TokenType.AND) 6934 end = self._parse_window_spec() 6935 exclude = ( 6936 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 6937 if self._match_text_seq("EXCLUDE") 6938 else None 6939 ) 6940 6941 spec = self.expression( 6942 exp.WindowSpec, 6943 kind=kind, 6944 start=start["value"], 6945 start_side=start["side"], 6946 end=end["value"], 6947 end_side=end["side"], 6948 exclude=exclude, 6949 ) 6950 else: 6951 spec = None 6952 6953 self._match_r_paren() 6954 6955 window = self.expression( 6956 exp.Window, 6957 comments=comments, 6958 this=this, 6959 partition_by=partition, 6960 order=order, 6961 spec=spec, 6962 alias=window_alias, 6963 over=over, 6964 first=first, 6965 ) 6966 6967 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6968 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6969 return self._parse_window(window, alias=alias) 6970 6971 return window 6972 6973 def _parse_partition_and_order( 6974 self, 6975 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6976 return self._parse_partition_by(), self._parse_order() 6977 6978 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6979 self._match(TokenType.BETWEEN) 6980 6981 return { 6982 "value": ( 6983 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6984 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6985 or self._parse_bitwise() 6986 ), 6987 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6988 } 6989 6990 def _parse_alias( 6991 self, this: t.Optional[exp.Expression], explicit: bool = False 6992 ) -> t.Optional[exp.Expression]: 6993 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6994 # so this section tries to parse the clause version and if it fails, it treats the token 6995 # as an identifier (alias) 6996 if self._can_parse_limit_or_offset(): 6997 return this 6998 6999 any_token = self._match(TokenType.ALIAS) 7000 comments = self._prev_comments or [] 7001 7002 if explicit and not any_token: 7003 return this 7004 7005 if self._match(TokenType.L_PAREN): 7006 aliases = self.expression( 7007 exp.Aliases, 7008 comments=comments, 7009 this=this, 7010 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7011 ) 7012 self._match_r_paren(aliases) 7013 return aliases 7014 7015 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7016 self.STRING_ALIASES and self._parse_string_as_identifier() 7017 ) 7018 7019 if alias: 7020 comments.extend(alias.pop_comments()) 7021 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7022 column = this.this 7023 7024 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7025 if not this.comments and column and column.comments: 7026 this.comments = column.pop_comments() 7027 7028 return this 7029 7030 def _parse_id_var( 7031 self, 7032 any_token: bool = True, 7033 tokens: t.Optional[t.Collection[TokenType]] = None, 7034 ) -> t.Optional[exp.Expression]: 7035 expression = self._parse_identifier() 7036 if not expression and ( 7037 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7038 ): 7039 quoted = self._prev.token_type == TokenType.STRING 7040 expression = self._identifier_expression(quoted=quoted) 7041 7042 return expression 7043 7044 def _parse_string(self) -> t.Optional[exp.Expression]: 7045 if self._match_set(self.STRING_PARSERS): 7046 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7047 return self._parse_placeholder() 7048 7049 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7050 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7051 if output: 7052 output.update_positions(self._prev) 7053 return output 7054 7055 def _parse_number(self) -> t.Optional[exp.Expression]: 7056 if self._match_set(self.NUMERIC_PARSERS): 7057 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7058 return self._parse_placeholder() 7059 7060 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7061 if self._match(TokenType.IDENTIFIER): 7062 return self._identifier_expression(quoted=True) 7063 return self._parse_placeholder() 7064 7065 def _parse_var( 7066 self, 7067 any_token: bool = False, 7068 tokens: t.Optional[t.Collection[TokenType]] = None, 7069 upper: bool = False, 7070 ) -> t.Optional[exp.Expression]: 7071 if ( 7072 (any_token and self._advance_any()) 7073 or self._match(TokenType.VAR) 7074 or (self._match_set(tokens) if tokens else False) 7075 ): 7076 return self.expression( 7077 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7078 ) 7079 return self._parse_placeholder() 7080 7081 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7082 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7083 self._advance() 7084 return self._prev 7085 return None 7086 7087 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7088 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7089 7090 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7091 return self._parse_primary() or self._parse_var(any_token=True) 7092 7093 def _parse_null(self) -> t.Optional[exp.Expression]: 7094 if self._match_set(self.NULL_TOKENS): 7095 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7096 return self._parse_placeholder() 7097 7098 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7099 if self._match(TokenType.TRUE): 7100 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7101 if self._match(TokenType.FALSE): 7102 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7103 return self._parse_placeholder() 7104 7105 def _parse_star(self) -> t.Optional[exp.Expression]: 7106 if self._match(TokenType.STAR): 7107 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7108 return self._parse_placeholder() 7109 7110 def _parse_parameter(self) -> exp.Parameter: 7111 this = self._parse_identifier() or self._parse_primary_or_var() 7112 return self.expression(exp.Parameter, this=this) 7113 7114 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7115 if self._match_set(self.PLACEHOLDER_PARSERS): 7116 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7117 if placeholder: 7118 return placeholder 7119 self._advance(-1) 7120 return None 7121 7122 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7123 if not self._match_texts(keywords): 7124 return None 7125 if self._match(TokenType.L_PAREN, advance=False): 7126 return self._parse_wrapped_csv(self._parse_expression) 7127 7128 expression = self._parse_expression() 7129 return [expression] if expression else None 7130 7131 def _parse_csv( 7132 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7133 ) -> t.List[exp.Expression]: 7134 parse_result = parse_method() 7135 items = [parse_result] if parse_result is not None else [] 7136 7137 while self._match(sep): 7138 self._add_comments(parse_result) 7139 parse_result = parse_method() 7140 if parse_result is not None: 7141 items.append(parse_result) 7142 7143 return items 7144 7145 def _parse_tokens( 7146 self, parse_method: t.Callable, expressions: t.Dict 7147 ) -> t.Optional[exp.Expression]: 7148 this = parse_method() 7149 7150 while self._match_set(expressions): 7151 this = self.expression( 7152 expressions[self._prev.token_type], 7153 this=this, 7154 comments=self._prev_comments, 7155 expression=parse_method(), 7156 ) 7157 7158 return this 7159 7160 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7161 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7162 7163 def _parse_wrapped_csv( 7164 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7165 ) -> t.List[exp.Expression]: 7166 return self._parse_wrapped( 7167 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7168 ) 7169 7170 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7171 wrapped = self._match(TokenType.L_PAREN) 7172 if not wrapped and not optional: 7173 self.raise_error("Expecting (") 7174 parse_result = parse_method() 7175 if wrapped: 7176 self._match_r_paren() 7177 return parse_result 7178 7179 def _parse_expressions(self) -> t.List[exp.Expression]: 7180 return self._parse_csv(self._parse_expression) 7181 7182 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7183 return self._parse_select() or self._parse_set_operations( 7184 self._parse_alias(self._parse_assignment(), explicit=True) 7185 if alias 7186 else self._parse_assignment() 7187 ) 7188 7189 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7190 return self._parse_query_modifiers( 7191 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7192 ) 7193 7194 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7195 this = None 7196 if self._match_texts(self.TRANSACTION_KIND): 7197 this = self._prev.text 7198 7199 self._match_texts(("TRANSACTION", "WORK")) 7200 7201 modes = [] 7202 while True: 7203 mode = [] 7204 while self._match(TokenType.VAR): 7205 mode.append(self._prev.text) 7206 7207 if mode: 7208 modes.append(" ".join(mode)) 7209 if not self._match(TokenType.COMMA): 7210 break 7211 7212 return self.expression(exp.Transaction, this=this, modes=modes) 7213 7214 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7215 chain = None 7216 savepoint = None 7217 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7218 7219 self._match_texts(("TRANSACTION", "WORK")) 7220 7221 if self._match_text_seq("TO"): 7222 self._match_text_seq("SAVEPOINT") 7223 savepoint = self._parse_id_var() 7224 7225 if self._match(TokenType.AND): 7226 chain = not self._match_text_seq("NO") 7227 self._match_text_seq("CHAIN") 7228 7229 if is_rollback: 7230 return self.expression(exp.Rollback, savepoint=savepoint) 7231 7232 return self.expression(exp.Commit, chain=chain) 7233 7234 def _parse_refresh(self) -> exp.Refresh: 7235 self._match(TokenType.TABLE) 7236 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7237 7238 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7239 if not self._prev.text.upper() == "ADD": 7240 return None 7241 7242 start = self._index 7243 self._match(TokenType.COLUMN) 7244 7245 exists_column = self._parse_exists(not_=True) 7246 expression = self._parse_field_def() 7247 7248 if not isinstance(expression, exp.ColumnDef): 7249 self._retreat(start) 7250 return None 7251 7252 expression.set("exists", exists_column) 7253 7254 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7255 if self._match_texts(("FIRST", "AFTER")): 7256 position = self._prev.text 7257 column_position = self.expression( 7258 exp.ColumnPosition, this=self._parse_column(), position=position 7259 ) 7260 expression.set("position", column_position) 7261 7262 return expression 7263 7264 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7265 drop = self._match(TokenType.DROP) and self._parse_drop() 7266 if drop and not isinstance(drop, exp.Command): 7267 drop.set("kind", drop.args.get("kind", "COLUMN")) 7268 return drop 7269 7270 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7271 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7272 return self.expression( 7273 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7274 ) 7275 7276 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7277 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7278 self._match_text_seq("ADD") 7279 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7280 return self.expression( 7281 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7282 ) 7283 7284 column_def = self._parse_add_column() 7285 if isinstance(column_def, exp.ColumnDef): 7286 return column_def 7287 7288 exists = self._parse_exists(not_=True) 7289 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7290 return self.expression( 7291 exp.AddPartition, exists=exists, this=self._parse_field(any_token=True) 7292 ) 7293 7294 return None 7295 7296 if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq( 7297 "COLUMNS" 7298 ): 7299 schema = self._parse_schema() 7300 7301 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7302 7303 return self._parse_csv(_parse_add_alteration) 7304 7305 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7306 if self._match_texts(self.ALTER_ALTER_PARSERS): 7307 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7308 7309 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7310 # keyword after ALTER we default to parsing this statement 7311 self._match(TokenType.COLUMN) 7312 column = self._parse_field(any_token=True) 7313 7314 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7315 return self.expression(exp.AlterColumn, this=column, drop=True) 7316 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7317 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7318 if self._match(TokenType.COMMENT): 7319 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7320 if self._match_text_seq("DROP", "NOT", "NULL"): 7321 return self.expression( 7322 exp.AlterColumn, 7323 this=column, 7324 drop=True, 7325 allow_null=True, 7326 ) 7327 if self._match_text_seq("SET", "NOT", "NULL"): 7328 return self.expression( 7329 exp.AlterColumn, 7330 this=column, 7331 allow_null=False, 7332 ) 7333 7334 if self._match_text_seq("SET", "VISIBLE"): 7335 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7336 if self._match_text_seq("SET", "INVISIBLE"): 7337 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7338 7339 self._match_text_seq("SET", "DATA") 7340 self._match_text_seq("TYPE") 7341 return self.expression( 7342 exp.AlterColumn, 7343 this=column, 7344 dtype=self._parse_types(), 7345 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7346 using=self._match(TokenType.USING) and self._parse_assignment(), 7347 ) 7348 7349 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7350 if self._match_texts(("ALL", "EVEN", "AUTO")): 7351 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7352 7353 self._match_text_seq("KEY", "DISTKEY") 7354 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7355 7356 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7357 if compound: 7358 self._match_text_seq("SORTKEY") 7359 7360 if self._match(TokenType.L_PAREN, advance=False): 7361 return self.expression( 7362 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7363 ) 7364 7365 self._match_texts(("AUTO", "NONE")) 7366 return self.expression( 7367 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7368 ) 7369 7370 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7371 index = self._index - 1 7372 7373 partition_exists = self._parse_exists() 7374 if self._match(TokenType.PARTITION, advance=False): 7375 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7376 7377 self._retreat(index) 7378 return self._parse_csv(self._parse_drop_column) 7379 7380 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7381 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7382 exists = self._parse_exists() 7383 old_column = self._parse_column() 7384 to = self._match_text_seq("TO") 7385 new_column = self._parse_column() 7386 7387 if old_column is None or to is None or new_column is None: 7388 return None 7389 7390 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7391 7392 self._match_text_seq("TO") 7393 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7394 7395 def _parse_alter_table_set(self) -> exp.AlterSet: 7396 alter_set = self.expression(exp.AlterSet) 7397 7398 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7399 "TABLE", "PROPERTIES" 7400 ): 7401 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7402 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7403 alter_set.set("expressions", [self._parse_assignment()]) 7404 elif self._match_texts(("LOGGED", "UNLOGGED")): 7405 alter_set.set("option", exp.var(self._prev.text.upper())) 7406 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7407 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7408 elif self._match_text_seq("LOCATION"): 7409 alter_set.set("location", self._parse_field()) 7410 elif self._match_text_seq("ACCESS", "METHOD"): 7411 alter_set.set("access_method", self._parse_field()) 7412 elif self._match_text_seq("TABLESPACE"): 7413 alter_set.set("tablespace", self._parse_field()) 7414 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7415 alter_set.set("file_format", [self._parse_field()]) 7416 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7417 alter_set.set("file_format", self._parse_wrapped_options()) 7418 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7419 alter_set.set("copy_options", self._parse_wrapped_options()) 7420 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7421 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7422 else: 7423 if self._match_text_seq("SERDE"): 7424 alter_set.set("serde", self._parse_field()) 7425 7426 properties = self._parse_wrapped(self._parse_properties, optional=True) 7427 alter_set.set("expressions", [properties]) 7428 7429 return alter_set 7430 7431 def _parse_alter(self) -> exp.Alter | exp.Command: 7432 start = self._prev 7433 7434 alter_token = self._match_set(self.ALTERABLES) and self._prev 7435 if not alter_token: 7436 return self._parse_as_command(start) 7437 7438 exists = self._parse_exists() 7439 only = self._match_text_seq("ONLY") 7440 this = self._parse_table(schema=True) 7441 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7442 7443 if self._next: 7444 self._advance() 7445 7446 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7447 if parser: 7448 actions = ensure_list(parser(self)) 7449 not_valid = self._match_text_seq("NOT", "VALID") 7450 options = self._parse_csv(self._parse_property) 7451 7452 if not self._curr and actions: 7453 return self.expression( 7454 exp.Alter, 7455 this=this, 7456 kind=alter_token.text.upper(), 7457 exists=exists, 7458 actions=actions, 7459 only=only, 7460 options=options, 7461 cluster=cluster, 7462 not_valid=not_valid, 7463 ) 7464 7465 return self._parse_as_command(start) 7466 7467 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7468 start = self._prev 7469 # https://duckdb.org/docs/sql/statements/analyze 7470 if not self._curr: 7471 return self.expression(exp.Analyze) 7472 7473 options = [] 7474 while self._match_texts(self.ANALYZE_STYLES): 7475 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7476 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7477 else: 7478 options.append(self._prev.text.upper()) 7479 7480 this: t.Optional[exp.Expression] = None 7481 inner_expression: t.Optional[exp.Expression] = None 7482 7483 kind = self._curr and self._curr.text.upper() 7484 7485 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7486 this = self._parse_table_parts() 7487 elif self._match_text_seq("TABLES"): 7488 if self._match_set((TokenType.FROM, TokenType.IN)): 7489 kind = f"{kind} {self._prev.text.upper()}" 7490 this = self._parse_table(schema=True, is_db_reference=True) 7491 elif self._match_text_seq("DATABASE"): 7492 this = self._parse_table(schema=True, is_db_reference=True) 7493 elif self._match_text_seq("CLUSTER"): 7494 this = self._parse_table() 7495 # Try matching inner expr keywords before fallback to parse table. 7496 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7497 kind = None 7498 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7499 else: 7500 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7501 kind = None 7502 this = self._parse_table_parts() 7503 7504 partition = self._try_parse(self._parse_partition) 7505 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7506 return self._parse_as_command(start) 7507 7508 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7509 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7510 "WITH", "ASYNC", "MODE" 7511 ): 7512 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7513 else: 7514 mode = None 7515 7516 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7517 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7518 7519 properties = self._parse_properties() 7520 return self.expression( 7521 exp.Analyze, 7522 kind=kind, 7523 this=this, 7524 mode=mode, 7525 partition=partition, 7526 properties=properties, 7527 expression=inner_expression, 7528 options=options, 7529 ) 7530 7531 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7532 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7533 this = None 7534 kind = self._prev.text.upper() 7535 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7536 expressions = [] 7537 7538 if not self._match_text_seq("STATISTICS"): 7539 self.raise_error("Expecting token STATISTICS") 7540 7541 if self._match_text_seq("NOSCAN"): 7542 this = "NOSCAN" 7543 elif self._match(TokenType.FOR): 7544 if self._match_text_seq("ALL", "COLUMNS"): 7545 this = "FOR ALL COLUMNS" 7546 if self._match_texts("COLUMNS"): 7547 this = "FOR COLUMNS" 7548 expressions = self._parse_csv(self._parse_column_reference) 7549 elif self._match_text_seq("SAMPLE"): 7550 sample = self._parse_number() 7551 expressions = [ 7552 self.expression( 7553 exp.AnalyzeSample, 7554 sample=sample, 7555 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7556 ) 7557 ] 7558 7559 return self.expression( 7560 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7561 ) 7562 7563 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7564 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7565 kind = None 7566 this = None 7567 expression: t.Optional[exp.Expression] = None 7568 if self._match_text_seq("REF", "UPDATE"): 7569 kind = "REF" 7570 this = "UPDATE" 7571 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7572 this = "UPDATE SET DANGLING TO NULL" 7573 elif self._match_text_seq("STRUCTURE"): 7574 kind = "STRUCTURE" 7575 if self._match_text_seq("CASCADE", "FAST"): 7576 this = "CASCADE FAST" 7577 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7578 ("ONLINE", "OFFLINE") 7579 ): 7580 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7581 expression = self._parse_into() 7582 7583 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7584 7585 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7586 this = self._prev.text.upper() 7587 if self._match_text_seq("COLUMNS"): 7588 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7589 return None 7590 7591 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7592 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7593 if self._match_text_seq("STATISTICS"): 7594 return self.expression(exp.AnalyzeDelete, kind=kind) 7595 return None 7596 7597 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7598 if self._match_text_seq("CHAINED", "ROWS"): 7599 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7600 return None 7601 7602 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7603 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7604 this = self._prev.text.upper() 7605 expression: t.Optional[exp.Expression] = None 7606 expressions = [] 7607 update_options = None 7608 7609 if self._match_text_seq("HISTOGRAM", "ON"): 7610 expressions = self._parse_csv(self._parse_column_reference) 7611 with_expressions = [] 7612 while self._match(TokenType.WITH): 7613 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7614 if self._match_texts(("SYNC", "ASYNC")): 7615 if self._match_text_seq("MODE", advance=False): 7616 with_expressions.append(f"{self._prev.text.upper()} MODE") 7617 self._advance() 7618 else: 7619 buckets = self._parse_number() 7620 if self._match_text_seq("BUCKETS"): 7621 with_expressions.append(f"{buckets} BUCKETS") 7622 if with_expressions: 7623 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7624 7625 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7626 TokenType.UPDATE, advance=False 7627 ): 7628 update_options = self._prev.text.upper() 7629 self._advance() 7630 elif self._match_text_seq("USING", "DATA"): 7631 expression = self.expression(exp.UsingData, this=self._parse_string()) 7632 7633 return self.expression( 7634 exp.AnalyzeHistogram, 7635 this=this, 7636 expressions=expressions, 7637 expression=expression, 7638 update_options=update_options, 7639 ) 7640 7641 def _parse_merge(self) -> exp.Merge: 7642 self._match(TokenType.INTO) 7643 target = self._parse_table() 7644 7645 if target and self._match(TokenType.ALIAS, advance=False): 7646 target.set("alias", self._parse_table_alias()) 7647 7648 self._match(TokenType.USING) 7649 using = self._parse_table() 7650 7651 self._match(TokenType.ON) 7652 on = self._parse_assignment() 7653 7654 return self.expression( 7655 exp.Merge, 7656 this=target, 7657 using=using, 7658 on=on, 7659 whens=self._parse_when_matched(), 7660 returning=self._parse_returning(), 7661 ) 7662 7663 def _parse_when_matched(self) -> exp.Whens: 7664 whens = [] 7665 7666 while self._match(TokenType.WHEN): 7667 matched = not self._match(TokenType.NOT) 7668 self._match_text_seq("MATCHED") 7669 source = ( 7670 False 7671 if self._match_text_seq("BY", "TARGET") 7672 else self._match_text_seq("BY", "SOURCE") 7673 ) 7674 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7675 7676 self._match(TokenType.THEN) 7677 7678 if self._match(TokenType.INSERT): 7679 this = self._parse_star() 7680 if this: 7681 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7682 else: 7683 then = self.expression( 7684 exp.Insert, 7685 this=exp.var("ROW") 7686 if self._match_text_seq("ROW") 7687 else self._parse_value(values=False), 7688 expression=self._match_text_seq("VALUES") and self._parse_value(), 7689 ) 7690 elif self._match(TokenType.UPDATE): 7691 expressions = self._parse_star() 7692 if expressions: 7693 then = self.expression(exp.Update, expressions=expressions) 7694 else: 7695 then = self.expression( 7696 exp.Update, 7697 expressions=self._match(TokenType.SET) 7698 and self._parse_csv(self._parse_equality), 7699 ) 7700 elif self._match(TokenType.DELETE): 7701 then = self.expression(exp.Var, this=self._prev.text) 7702 else: 7703 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7704 7705 whens.append( 7706 self.expression( 7707 exp.When, 7708 matched=matched, 7709 source=source, 7710 condition=condition, 7711 then=then, 7712 ) 7713 ) 7714 return self.expression(exp.Whens, expressions=whens) 7715 7716 def _parse_show(self) -> t.Optional[exp.Expression]: 7717 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7718 if parser: 7719 return parser(self) 7720 return self._parse_as_command(self._prev) 7721 7722 def _parse_set_item_assignment( 7723 self, kind: t.Optional[str] = None 7724 ) -> t.Optional[exp.Expression]: 7725 index = self._index 7726 7727 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7728 return self._parse_set_transaction(global_=kind == "GLOBAL") 7729 7730 left = self._parse_primary() or self._parse_column() 7731 assignment_delimiter = self._match_texts(("=", "TO")) 7732 7733 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7734 self._retreat(index) 7735 return None 7736 7737 right = self._parse_statement() or self._parse_id_var() 7738 if isinstance(right, (exp.Column, exp.Identifier)): 7739 right = exp.var(right.name) 7740 7741 this = self.expression(exp.EQ, this=left, expression=right) 7742 return self.expression(exp.SetItem, this=this, kind=kind) 7743 7744 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7745 self._match_text_seq("TRANSACTION") 7746 characteristics = self._parse_csv( 7747 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7748 ) 7749 return self.expression( 7750 exp.SetItem, 7751 expressions=characteristics, 7752 kind="TRANSACTION", 7753 **{"global": global_}, # type: ignore 7754 ) 7755 7756 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7757 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7758 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7759 7760 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7761 index = self._index 7762 set_ = self.expression( 7763 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7764 ) 7765 7766 if self._curr: 7767 self._retreat(index) 7768 return self._parse_as_command(self._prev) 7769 7770 return set_ 7771 7772 def _parse_var_from_options( 7773 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7774 ) -> t.Optional[exp.Var]: 7775 start = self._curr 7776 if not start: 7777 return None 7778 7779 option = start.text.upper() 7780 continuations = options.get(option) 7781 7782 index = self._index 7783 self._advance() 7784 for keywords in continuations or []: 7785 if isinstance(keywords, str): 7786 keywords = (keywords,) 7787 7788 if self._match_text_seq(*keywords): 7789 option = f"{option} {' '.join(keywords)}" 7790 break 7791 else: 7792 if continuations or continuations is None: 7793 if raise_unmatched: 7794 self.raise_error(f"Unknown option {option}") 7795 7796 self._retreat(index) 7797 return None 7798 7799 return exp.var(option) 7800 7801 def _parse_as_command(self, start: Token) -> exp.Command: 7802 while self._curr: 7803 self._advance() 7804 text = self._find_sql(start, self._prev) 7805 size = len(start.text) 7806 self._warn_unsupported() 7807 return exp.Command(this=text[:size], expression=text[size:]) 7808 7809 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7810 settings = [] 7811 7812 self._match_l_paren() 7813 kind = self._parse_id_var() 7814 7815 if self._match(TokenType.L_PAREN): 7816 while True: 7817 key = self._parse_id_var() 7818 value = self._parse_primary() 7819 if not key and value is None: 7820 break 7821 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7822 self._match(TokenType.R_PAREN) 7823 7824 self._match_r_paren() 7825 7826 return self.expression( 7827 exp.DictProperty, 7828 this=this, 7829 kind=kind.this if kind else None, 7830 settings=settings, 7831 ) 7832 7833 def _parse_dict_range(self, this: str) -> exp.DictRange: 7834 self._match_l_paren() 7835 has_min = self._match_text_seq("MIN") 7836 if has_min: 7837 min = self._parse_var() or self._parse_primary() 7838 self._match_text_seq("MAX") 7839 max = self._parse_var() or self._parse_primary() 7840 else: 7841 max = self._parse_var() or self._parse_primary() 7842 min = exp.Literal.number(0) 7843 self._match_r_paren() 7844 return self.expression(exp.DictRange, this=this, min=min, max=max) 7845 7846 def _parse_comprehension( 7847 self, this: t.Optional[exp.Expression] 7848 ) -> t.Optional[exp.Comprehension]: 7849 index = self._index 7850 expression = self._parse_column() 7851 if not self._match(TokenType.IN): 7852 self._retreat(index - 1) 7853 return None 7854 iterator = self._parse_column() 7855 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7856 return self.expression( 7857 exp.Comprehension, 7858 this=this, 7859 expression=expression, 7860 iterator=iterator, 7861 condition=condition, 7862 ) 7863 7864 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7865 if self._match(TokenType.HEREDOC_STRING): 7866 return self.expression(exp.Heredoc, this=self._prev.text) 7867 7868 if not self._match_text_seq("$"): 7869 return None 7870 7871 tags = ["$"] 7872 tag_text = None 7873 7874 if self._is_connected(): 7875 self._advance() 7876 tags.append(self._prev.text.upper()) 7877 else: 7878 self.raise_error("No closing $ found") 7879 7880 if tags[-1] != "$": 7881 if self._is_connected() and self._match_text_seq("$"): 7882 tag_text = tags[-1] 7883 tags.append("$") 7884 else: 7885 self.raise_error("No closing $ found") 7886 7887 heredoc_start = self._curr 7888 7889 while self._curr: 7890 if self._match_text_seq(*tags, advance=False): 7891 this = self._find_sql(heredoc_start, self._prev) 7892 self._advance(len(tags)) 7893 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7894 7895 self._advance() 7896 7897 self.raise_error(f"No closing {''.join(tags)} found") 7898 return None 7899 7900 def _find_parser( 7901 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7902 ) -> t.Optional[t.Callable]: 7903 if not self._curr: 7904 return None 7905 7906 index = self._index 7907 this = [] 7908 while True: 7909 # The current token might be multiple words 7910 curr = self._curr.text.upper() 7911 key = curr.split(" ") 7912 this.append(curr) 7913 7914 self._advance() 7915 result, trie = in_trie(trie, key) 7916 if result == TrieResult.FAILED: 7917 break 7918 7919 if result == TrieResult.EXISTS: 7920 subparser = parsers[" ".join(this)] 7921 return subparser 7922 7923 self._retreat(index) 7924 return None 7925 7926 def _match(self, token_type, advance=True, expression=None): 7927 if not self._curr: 7928 return None 7929 7930 if self._curr.token_type == token_type: 7931 if advance: 7932 self._advance() 7933 self._add_comments(expression) 7934 return True 7935 7936 return None 7937 7938 def _match_set(self, types, advance=True): 7939 if not self._curr: 7940 return None 7941 7942 if self._curr.token_type in types: 7943 if advance: 7944 self._advance() 7945 return True 7946 7947 return None 7948 7949 def _match_pair(self, token_type_a, token_type_b, advance=True): 7950 if not self._curr or not self._next: 7951 return None 7952 7953 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7954 if advance: 7955 self._advance(2) 7956 return True 7957 7958 return None 7959 7960 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7961 if not self._match(TokenType.L_PAREN, expression=expression): 7962 self.raise_error("Expecting (") 7963 7964 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7965 if not self._match(TokenType.R_PAREN, expression=expression): 7966 self.raise_error("Expecting )") 7967 7968 def _match_texts(self, texts, advance=True): 7969 if ( 7970 self._curr 7971 and self._curr.token_type != TokenType.STRING 7972 and self._curr.text.upper() in texts 7973 ): 7974 if advance: 7975 self._advance() 7976 return True 7977 return None 7978 7979 def _match_text_seq(self, *texts, advance=True): 7980 index = self._index 7981 for text in texts: 7982 if ( 7983 self._curr 7984 and self._curr.token_type != TokenType.STRING 7985 and self._curr.text.upper() == text 7986 ): 7987 self._advance() 7988 else: 7989 self._retreat(index) 7990 return None 7991 7992 if not advance: 7993 self._retreat(index) 7994 7995 return True 7996 7997 def _replace_lambda( 7998 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7999 ) -> t.Optional[exp.Expression]: 8000 if not node: 8001 return node 8002 8003 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8004 8005 for column in node.find_all(exp.Column): 8006 typ = lambda_types.get(column.parts[0].name) 8007 if typ is not None: 8008 dot_or_id = column.to_dot() if column.table else column.this 8009 8010 if typ: 8011 dot_or_id = self.expression( 8012 exp.Cast, 8013 this=dot_or_id, 8014 to=typ, 8015 ) 8016 8017 parent = column.parent 8018 8019 while isinstance(parent, exp.Dot): 8020 if not isinstance(parent.parent, exp.Dot): 8021 parent.replace(dot_or_id) 8022 break 8023 parent = parent.parent 8024 else: 8025 if column is node: 8026 node = dot_or_id 8027 else: 8028 column.replace(dot_or_id) 8029 return node 8030 8031 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8032 start = self._prev 8033 8034 # Not to be confused with TRUNCATE(number, decimals) function call 8035 if self._match(TokenType.L_PAREN): 8036 self._retreat(self._index - 2) 8037 return self._parse_function() 8038 8039 # Clickhouse supports TRUNCATE DATABASE as well 8040 is_database = self._match(TokenType.DATABASE) 8041 8042 self._match(TokenType.TABLE) 8043 8044 exists = self._parse_exists(not_=False) 8045 8046 expressions = self._parse_csv( 8047 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8048 ) 8049 8050 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8051 8052 if self._match_text_seq("RESTART", "IDENTITY"): 8053 identity = "RESTART" 8054 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8055 identity = "CONTINUE" 8056 else: 8057 identity = None 8058 8059 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8060 option = self._prev.text 8061 else: 8062 option = None 8063 8064 partition = self._parse_partition() 8065 8066 # Fallback case 8067 if self._curr: 8068 return self._parse_as_command(start) 8069 8070 return self.expression( 8071 exp.TruncateTable, 8072 expressions=expressions, 8073 is_database=is_database, 8074 exists=exists, 8075 cluster=cluster, 8076 identity=identity, 8077 option=option, 8078 partition=partition, 8079 ) 8080 8081 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8082 this = self._parse_ordered(self._parse_opclass) 8083 8084 if not self._match(TokenType.WITH): 8085 return this 8086 8087 op = self._parse_var(any_token=True) 8088 8089 return self.expression(exp.WithOperator, this=this, op=op) 8090 8091 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8092 self._match(TokenType.EQ) 8093 self._match(TokenType.L_PAREN) 8094 8095 opts: t.List[t.Optional[exp.Expression]] = [] 8096 option: exp.Expression | None 8097 while self._curr and not self._match(TokenType.R_PAREN): 8098 if self._match_text_seq("FORMAT_NAME", "="): 8099 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8100 option = self._parse_format_name() 8101 else: 8102 option = self._parse_property() 8103 8104 if option is None: 8105 self.raise_error("Unable to parse option") 8106 break 8107 8108 opts.append(option) 8109 8110 return opts 8111 8112 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8113 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8114 8115 options = [] 8116 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8117 option = self._parse_var(any_token=True) 8118 prev = self._prev.text.upper() 8119 8120 # Different dialects might separate options and values by white space, "=" and "AS" 8121 self._match(TokenType.EQ) 8122 self._match(TokenType.ALIAS) 8123 8124 param = self.expression(exp.CopyParameter, this=option) 8125 8126 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8127 TokenType.L_PAREN, advance=False 8128 ): 8129 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8130 param.set("expressions", self._parse_wrapped_options()) 8131 elif prev == "FILE_FORMAT": 8132 # T-SQL's external file format case 8133 param.set("expression", self._parse_field()) 8134 else: 8135 param.set("expression", self._parse_unquoted_field()) 8136 8137 options.append(param) 8138 self._match(sep) 8139 8140 return options 8141 8142 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8143 expr = self.expression(exp.Credentials) 8144 8145 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8146 expr.set("storage", self._parse_field()) 8147 if self._match_text_seq("CREDENTIALS"): 8148 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8149 creds = ( 8150 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8151 ) 8152 expr.set("credentials", creds) 8153 if self._match_text_seq("ENCRYPTION"): 8154 expr.set("encryption", self._parse_wrapped_options()) 8155 if self._match_text_seq("IAM_ROLE"): 8156 expr.set("iam_role", self._parse_field()) 8157 if self._match_text_seq("REGION"): 8158 expr.set("region", self._parse_field()) 8159 8160 return expr 8161 8162 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8163 return self._parse_field() 8164 8165 def _parse_copy(self) -> exp.Copy | exp.Command: 8166 start = self._prev 8167 8168 self._match(TokenType.INTO) 8169 8170 this = ( 8171 self._parse_select(nested=True, parse_subquery_alias=False) 8172 if self._match(TokenType.L_PAREN, advance=False) 8173 else self._parse_table(schema=True) 8174 ) 8175 8176 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8177 8178 files = self._parse_csv(self._parse_file_location) 8179 credentials = self._parse_credentials() 8180 8181 self._match_text_seq("WITH") 8182 8183 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8184 8185 # Fallback case 8186 if self._curr: 8187 return self._parse_as_command(start) 8188 8189 return self.expression( 8190 exp.Copy, 8191 this=this, 8192 kind=kind, 8193 credentials=credentials, 8194 files=files, 8195 params=params, 8196 ) 8197 8198 def _parse_normalize(self) -> exp.Normalize: 8199 return self.expression( 8200 exp.Normalize, 8201 this=self._parse_bitwise(), 8202 form=self._match(TokenType.COMMA) and self._parse_var(), 8203 ) 8204 8205 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8206 args = self._parse_csv(lambda: self._parse_lambda()) 8207 8208 this = seq_get(args, 0) 8209 decimals = seq_get(args, 1) 8210 8211 return expr_type( 8212 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8213 ) 8214 8215 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8216 star_token = self._prev 8217 8218 if self._match_text_seq("COLUMNS", "(", advance=False): 8219 this = self._parse_function() 8220 if isinstance(this, exp.Columns): 8221 this.set("unpack", True) 8222 return this 8223 8224 return self.expression( 8225 exp.Star, 8226 **{ # type: ignore 8227 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8228 "replace": self._parse_star_op("REPLACE"), 8229 "rename": self._parse_star_op("RENAME"), 8230 }, 8231 ).update_positions(star_token) 8232 8233 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8234 privilege_parts = [] 8235 8236 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8237 # (end of privilege list) or L_PAREN (start of column list) are met 8238 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8239 privilege_parts.append(self._curr.text.upper()) 8240 self._advance() 8241 8242 this = exp.var(" ".join(privilege_parts)) 8243 expressions = ( 8244 self._parse_wrapped_csv(self._parse_column) 8245 if self._match(TokenType.L_PAREN, advance=False) 8246 else None 8247 ) 8248 8249 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8250 8251 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8252 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8253 principal = self._parse_id_var() 8254 8255 if not principal: 8256 return None 8257 8258 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8259 8260 def _parse_grant(self) -> exp.Grant | exp.Command: 8261 start = self._prev 8262 8263 privileges = self._parse_csv(self._parse_grant_privilege) 8264 8265 self._match(TokenType.ON) 8266 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8267 8268 # Attempt to parse the securable e.g. MySQL allows names 8269 # such as "foo.*", "*.*" which are not easily parseable yet 8270 securable = self._try_parse(self._parse_table_parts) 8271 8272 if not securable or not self._match_text_seq("TO"): 8273 return self._parse_as_command(start) 8274 8275 principals = self._parse_csv(self._parse_grant_principal) 8276 8277 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8278 8279 if self._curr: 8280 return self._parse_as_command(start) 8281 8282 return self.expression( 8283 exp.Grant, 8284 privileges=privileges, 8285 kind=kind, 8286 securable=securable, 8287 principals=principals, 8288 grant_option=grant_option, 8289 ) 8290 8291 def _parse_overlay(self) -> exp.Overlay: 8292 return self.expression( 8293 exp.Overlay, 8294 **{ # type: ignore 8295 "this": self._parse_bitwise(), 8296 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8297 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8298 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8299 }, 8300 ) 8301 8302 def _parse_format_name(self) -> exp.Property: 8303 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8304 # for FILE_FORMAT = <format_name> 8305 return self.expression( 8306 exp.Property, 8307 this=exp.var("FORMAT_NAME"), 8308 value=self._parse_string() or self._parse_table_parts(), 8309 ) 8310 8311 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8312 args: t.List[exp.Expression] = [] 8313 8314 if self._match(TokenType.DISTINCT): 8315 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8316 self._match(TokenType.COMMA) 8317 8318 args.extend(self._parse_csv(self._parse_assignment)) 8319 8320 return self.expression( 8321 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8322 ) 8323 8324 def _identifier_expression( 8325 self, token: t.Optional[Token] = None, **kwargs: t.Any 8326 ) -> exp.Identifier: 8327 token = token or self._prev 8328 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8329 expression.update_positions(token) 8330 return expression 8331 8332 def _build_pipe_cte(self, query: exp.Query, expressions: t.List[exp.Expression]) -> exp.Select: 8333 if not query.selects: 8334 query = query.select("*", copy=False) 8335 8336 self._pipe_cte_counter += 1 8337 new_cte = f"__tmp{self._pipe_cte_counter}" 8338 8339 with_ = query.args.get("with") 8340 ctes = with_.pop() if with_ else None 8341 8342 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8343 if ctes: 8344 new_select.set("with", ctes) 8345 8346 return new_select.with_(new_cte, as_=query, copy=False) 8347 8348 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8349 select = self._parse_select() 8350 if not select: 8351 return query 8352 8353 if not query.selects: 8354 return self._build_pipe_cte(query.select(*select.expressions), [exp.Star()]) 8355 8356 return self._build_pipe_cte(query, select.expressions) 8357 8358 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8359 limit = self._parse_limit() 8360 offset = self._parse_offset() 8361 if limit: 8362 curr_limit = query.args.get("limit", limit) 8363 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8364 query.limit(limit, copy=False) 8365 if offset: 8366 curr_offset = query.args.get("offset") 8367 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8368 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8369 8370 return query 8371 8372 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8373 this = self._parse_assignment() 8374 if self._match_text_seq("GROUP", "AND", advance=False): 8375 return this 8376 8377 this = self._parse_alias(this) 8378 8379 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8380 return self._parse_ordered(lambda: this) 8381 8382 return this 8383 8384 def _parse_pipe_syntax_aggregate_group_order_by( 8385 self, query: exp.Select, group_by_exists: bool = True 8386 ) -> exp.Select: 8387 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8388 aggregates_or_groups, orders = [], [] 8389 for element in expr: 8390 if isinstance(element, exp.Ordered): 8391 this = element.this 8392 if isinstance(this, exp.Alias): 8393 element.set("this", this.args["alias"]) 8394 orders.append(element) 8395 else: 8396 this = element 8397 aggregates_or_groups.append(this) 8398 8399 if group_by_exists: 8400 query = query.select(*aggregates_or_groups, copy=False).group_by( 8401 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8402 copy=False, 8403 ) 8404 else: 8405 query = query.select(*aggregates_or_groups, copy=False) 8406 8407 if orders: 8408 return query.order_by(*orders, append=False, copy=False) 8409 8410 return query 8411 8412 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8413 self._match_text_seq("AGGREGATE") 8414 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8415 8416 if self._match(TokenType.GROUP_BY) or ( 8417 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8418 ): 8419 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8420 8421 return self._build_pipe_cte(query, [exp.Star()]) 8422 8423 def _parse_pipe_syntax_set_operator( 8424 self, query: t.Optional[exp.Query] 8425 ) -> t.Optional[exp.Select]: 8426 first_setop = self.parse_set_operation(this=query) 8427 8428 if not first_setop or not query: 8429 return None 8430 8431 first_setop.this.pop() 8432 distinct = first_setop.args.pop("distinct") 8433 setops = [first_setop.expression.pop(), *self._parse_expressions()] 8434 8435 query = self._build_pipe_cte(query, [exp.Star()]) 8436 with_ = query.args.get("with") 8437 ctes = with_.pop() if with_ else None 8438 8439 if isinstance(first_setop, exp.Union): 8440 query = query.union(*setops, distinct=distinct, copy=False, **first_setop.args) 8441 elif isinstance(first_setop, exp.Except): 8442 query = query.except_(*setops, distinct=distinct, copy=False, **first_setop.args) 8443 else: 8444 query = query.intersect(*setops, distinct=distinct, copy=False, **first_setop.args) 8445 8446 query.set("with", ctes) 8447 8448 return self._build_pipe_cte(query, [exp.Star()]) 8449 8450 def _parse_pipe_syntax_join(self, query: exp.Select) -> t.Optional[exp.Select]: 8451 join = self._parse_join() 8452 if not join: 8453 return None 8454 8455 return query.join(join, copy=False) 8456 8457 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8458 pivots = self._parse_pivots() 8459 if not pivots: 8460 return query 8461 8462 from_ = query.args.get("from") 8463 if from_: 8464 from_.this.set("pivots", pivots) 8465 8466 return self._build_pipe_cte(query, [exp.Star()]) 8467 8468 def _parse_pipe_syntax_query(self, query: exp.Select) -> t.Optional[exp.Select]: 8469 while self._match(TokenType.PIPE_GT): 8470 start = self._curr 8471 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8472 if not parser: 8473 parsed_query = self._parse_pipe_syntax_set_operator( 8474 query 8475 ) or self._parse_pipe_syntax_join(query) 8476 if not parsed_query: 8477 self._retreat(start) 8478 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8479 break 8480 query = parsed_query 8481 else: 8482 query = parser(self, query) 8483 8484 if query and not query.selects: 8485 return query.select("*", copy=False) 8486 8487 return query
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1535 def __init__( 1536 self, 1537 error_level: t.Optional[ErrorLevel] = None, 1538 error_message_context: int = 100, 1539 max_errors: int = 3, 1540 dialect: DialectType = None, 1541 ): 1542 from sqlglot.dialects import Dialect 1543 1544 self.error_level = error_level or ErrorLevel.IMMEDIATE 1545 self.error_message_context = error_message_context 1546 self.max_errors = max_errors 1547 self.dialect = Dialect.get_or_raise(dialect) 1548 self.reset()
1561 def parse( 1562 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1563 ) -> t.List[t.Optional[exp.Expression]]: 1564 """ 1565 Parses a list of tokens and returns a list of syntax trees, one tree 1566 per parsed SQL statement. 1567 1568 Args: 1569 raw_tokens: The list of tokens. 1570 sql: The original SQL string, used to produce helpful debug messages. 1571 1572 Returns: 1573 The list of the produced syntax trees. 1574 """ 1575 return self._parse( 1576 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1577 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1579 def parse_into( 1580 self, 1581 expression_types: exp.IntoType, 1582 raw_tokens: t.List[Token], 1583 sql: t.Optional[str] = None, 1584 ) -> t.List[t.Optional[exp.Expression]]: 1585 """ 1586 Parses a list of tokens into a given Expression type. If a collection of Expression 1587 types is given instead, this method will try to parse the token list into each one 1588 of them, stopping at the first for which the parsing succeeds. 1589 1590 Args: 1591 expression_types: The expression type(s) to try and parse the token list into. 1592 raw_tokens: The list of tokens. 1593 sql: The original SQL string, used to produce helpful debug messages. 1594 1595 Returns: 1596 The target Expression. 1597 """ 1598 errors = [] 1599 for expression_type in ensure_list(expression_types): 1600 parser = self.EXPRESSION_PARSERS.get(expression_type) 1601 if not parser: 1602 raise TypeError(f"No parser registered for {expression_type}") 1603 1604 try: 1605 return self._parse(parser, raw_tokens, sql) 1606 except ParseError as e: 1607 e.errors[0]["into_expression"] = expression_type 1608 errors.append(e) 1609 1610 raise ParseError( 1611 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1612 errors=merge_errors(errors), 1613 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1653 def check_errors(self) -> None: 1654 """Logs or raises any found errors, depending on the chosen error level setting.""" 1655 if self.error_level == ErrorLevel.WARN: 1656 for error in self.errors: 1657 logger.error(str(error)) 1658 elif self.error_level == ErrorLevel.RAISE and self.errors: 1659 raise ParseError( 1660 concat_messages(self.errors, self.max_errors), 1661 errors=merge_errors(self.errors), 1662 )
Logs or raises any found errors, depending on the chosen error level setting.
1664 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1665 """ 1666 Appends an error in the list of recorded errors or raises it, depending on the chosen 1667 error level setting. 1668 """ 1669 token = token or self._curr or self._prev or Token.string("") 1670 start = token.start 1671 end = token.end + 1 1672 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1673 highlight = self.sql[start:end] 1674 end_context = self.sql[end : end + self.error_message_context] 1675 1676 error = ParseError.new( 1677 f"{message}. Line {token.line}, Col: {token.col}.\n" 1678 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1679 description=message, 1680 line=token.line, 1681 col=token.col, 1682 start_context=start_context, 1683 highlight=highlight, 1684 end_context=end_context, 1685 ) 1686 1687 if self.error_level == ErrorLevel.IMMEDIATE: 1688 raise error 1689 1690 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1692 def expression( 1693 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1694 ) -> E: 1695 """ 1696 Creates a new, validated Expression. 1697 1698 Args: 1699 exp_class: The expression class to instantiate. 1700 comments: An optional list of comments to attach to the expression. 1701 kwargs: The arguments to set for the expression along with their respective values. 1702 1703 Returns: 1704 The target expression. 1705 """ 1706 instance = exp_class(**kwargs) 1707 instance.add_comments(comments) if comments else self._add_comments(instance) 1708 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1715 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1716 """ 1717 Validates an Expression, making sure that all its mandatory arguments are set. 1718 1719 Args: 1720 expression: The expression to validate. 1721 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1722 1723 Returns: 1724 The validated expression. 1725 """ 1726 if self.error_level != ErrorLevel.IGNORE: 1727 for error_message in expression.error_messages(args): 1728 self.raise_error(error_message) 1729 1730 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4707 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4708 start = self._index 4709 _, side_token, kind_token = self._parse_join_parts() 4710 4711 side = side_token.text if side_token else None 4712 kind = kind_token.text if kind_token else None 4713 4714 if not self._match_set(self.SET_OPERATIONS): 4715 self._retreat(start) 4716 return None 4717 4718 token_type = self._prev.token_type 4719 4720 if token_type == TokenType.UNION: 4721 operation: t.Type[exp.SetOperation] = exp.Union 4722 elif token_type == TokenType.EXCEPT: 4723 operation = exp.Except 4724 else: 4725 operation = exp.Intersect 4726 4727 comments = self._prev.comments 4728 4729 if self._match(TokenType.DISTINCT): 4730 distinct: t.Optional[bool] = True 4731 elif self._match(TokenType.ALL): 4732 distinct = False 4733 else: 4734 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4735 if distinct is None: 4736 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4737 4738 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4739 "STRICT", "CORRESPONDING" 4740 ) 4741 if self._match_text_seq("CORRESPONDING"): 4742 by_name = True 4743 if not side and not kind: 4744 kind = "INNER" 4745 4746 on_column_list = None 4747 if by_name and self._match_texts(("ON", "BY")): 4748 on_column_list = self._parse_wrapped_csv(self._parse_column) 4749 4750 expression = self._parse_select(nested=True, parse_set_operation=False) 4751 4752 return self.expression( 4753 operation, 4754 comments=comments, 4755 this=this, 4756 distinct=distinct, 4757 by_name=by_name, 4758 expression=expression, 4759 side=side, 4760 kind=kind, 4761 on=on_column_list, 4762 )