Initial commit

2025-08-14 00:25:46 +02:00 · 2024-12-23 19:27:44 +06:00
commit e81df4c87e
4952 changed files with 1705479 additions and 0 deletions
--- a/venv/Lib/site-packages/blib2to3/pgen2/grammar.py
+++ b/venv/Lib/site-packages/blib2to3/pgen2/grammar.py
@@ -0,0 +1,228 @@
+# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""This module defines the data structures used to represent a grammar.
+
+These are a bit arcane because they are derived from the data
+structures used by Python's 'pgen' parser generator.
+
+There's also a table here mapping operators to their names in the
+token module; the Python tokenize module reports all operators as the
+fallback token code OP, but the parser needs the actual token code.
+
+"""
+
+# Python imports
+import os
+import pickle
+import tempfile
+from typing import Any, Optional, TypeVar, Union
+
+# Local imports
+from . import token
+
+_P = TypeVar("_P", bound="Grammar")
+Label = tuple[int, Optional[str]]
+DFA = list[list[tuple[int, int]]]
+DFAS = tuple[DFA, dict[int, int]]
+Path = Union[str, "os.PathLike[str]"]
+
+
+class Grammar:
+    """Pgen parsing tables conversion class.
+
+    Once initialized, this class supplies the grammar tables for the
+    parsing engine implemented by parse.py.  The parsing engine
+    accesses the instance variables directly.  The class here does not
+    provide initialization of the tables; several subclasses exist to
+    do this (see the conv and pgen modules).
+
+    The load() method reads the tables from a pickle file, which is
+    much faster than the other ways offered by subclasses.  The pickle
+    file is written by calling dump() (after loading the grammar
+    tables using a subclass).  The report() method prints a readable
+    representation of the tables to stdout, for debugging.
+
+    The instance variables are as follows:
+
+    symbol2number -- a dict mapping symbol names to numbers.  Symbol
+                     numbers are always 256 or higher, to distinguish
+                     them from token numbers, which are between 0 and
+                     255 (inclusive).
+
+    number2symbol -- a dict mapping numbers to symbol names;
+                     these two are each other's inverse.
+
+    states        -- a list of DFAs, where each DFA is a list of
+                     states, each state is a list of arcs, and each
+                     arc is a (i, j) pair where i is a label and j is
+                     a state number.  The DFA number is the index into
+                     this list.  (This name is slightly confusing.)
+                     Final states are represented by a special arc of
+                     the form (0, j) where j is its own state number.
+
+    dfas          -- a dict mapping symbol numbers to (DFA, first)
+                     pairs, where DFA is an item from the states list
+                     above, and first is a set of tokens that can
+                     begin this grammar rule (represented by a dict
+                     whose values are always 1).
+
+    labels        -- a list of (x, y) pairs where x is either a token
+                     number or a symbol number, and y is either None
+                     or a string; the strings are keywords.  The label
+                     number is the index in this list; label numbers
+                     are used to mark state transitions (arcs) in the
+                     DFAs.
+
+    start         -- the number of the grammar's start symbol.
+
+    keywords      -- a dict mapping keyword strings to arc labels.
+
+    tokens        -- a dict mapping token numbers to arc labels.
+
+    """
+
+    def __init__(self) -> None:
+        self.symbol2number: dict[str, int] = {}
+        self.number2symbol: dict[int, str] = {}
+        self.states: list[DFA] = []
+        self.dfas: dict[int, DFAS] = {}
+        self.labels: list[Label] = [(0, "EMPTY")]
+        self.keywords: dict[str, int] = {}
+        self.soft_keywords: dict[str, int] = {}
+        self.tokens: dict[int, int] = {}
+        self.symbol2label: dict[str, int] = {}
+        self.version: tuple[int, int] = (0, 0)
+        self.start = 256
+        # Python 3.7+ parses async as a keyword, not an identifier
+        self.async_keywords = False
+
+    def dump(self, filename: Path) -> None:
+        """Dump the grammar tables to a pickle file."""
+
+        # mypyc generates objects that don't have a __dict__, but they
+        # do have __getstate__ methods that will return an equivalent
+        # dictionary
+        if hasattr(self, "__dict__"):
+            d = self.__dict__
+        else:
+            d = self.__getstate__()  # type: ignore
+
+        with tempfile.NamedTemporaryFile(
+            dir=os.path.dirname(filename), delete=False
+        ) as f:
+            pickle.dump(d, f, pickle.HIGHEST_PROTOCOL)
+        os.replace(f.name, filename)
+
+    def _update(self, attrs: dict[str, Any]) -> None:
+        for k, v in attrs.items():
+            setattr(self, k, v)
+
+    def load(self, filename: Path) -> None:
+        """Load the grammar tables from a pickle file."""
+        with open(filename, "rb") as f:
+            d = pickle.load(f)
+        self._update(d)
+
+    def loads(self, pkl: bytes) -> None:
+        """Load the grammar tables from a pickle bytes object."""
+        self._update(pickle.loads(pkl))
+
+    def copy(self: _P) -> _P:
+        """
+        Copy the grammar.
+        """
+        new = self.__class__()
+        for dict_attr in (
+            "symbol2number",
+            "number2symbol",
+            "dfas",
+            "keywords",
+            "soft_keywords",
+            "tokens",
+            "symbol2label",
+        ):
+            setattr(new, dict_attr, getattr(self, dict_attr).copy())
+        new.labels = self.labels[:]
+        new.states = self.states[:]
+        new.start = self.start
+        new.version = self.version
+        new.async_keywords = self.async_keywords
+        return new
+
+    def report(self) -> None:
+        """Dump the grammar tables to standard output, for debugging."""
+        from pprint import pprint
+
+        print("s2n")
+        pprint(self.symbol2number)
+        print("n2s")
+        pprint(self.number2symbol)
+        print("states")
+        pprint(self.states)
+        print("dfas")
+        pprint(self.dfas)
+        print("labels")
+        pprint(self.labels)
+        print("start", self.start)
+
+
+# Map from operator to number (since tokenize doesn't do this)
+
+opmap_raw = """
+( LPAR
+) RPAR
+[ LSQB
+] RSQB
+: COLON
+, COMMA
+; SEMI
+ PLUS
+- MINUS
+* STAR
+/ SLASH
+| VBAR
+& AMPER
+< LESS
+> GREATER
+= EQUAL
+. DOT
+% PERCENT
+` BACKQUOTE
+{ LBRACE
+} RBRACE
+@ AT
+@= ATEQUAL
+== EQEQUAL
+!= NOTEQUAL
+<> NOTEQUAL
+<= LESSEQUAL
+>= GREATEREQUAL
+~ TILDE
+^ CIRCUMFLEX
+<< LEFTSHIFT
+>> RIGHTSHIFT
+** DOUBLESTAR
+= PLUSEQUAL
+-= MINEQUAL
+*= STAREQUAL
+/= SLASHEQUAL
+%= PERCENTEQUAL
+&= AMPEREQUAL
+|= VBAREQUAL
+^= CIRCUMFLEXEQUAL
+<<= LEFTSHIFTEQUAL
+>>= RIGHTSHIFTEQUAL
+**= DOUBLESTAREQUAL
+// DOUBLESLASH
+//= DOUBLESLASHEQUAL
+-> RARROW
+:= COLONEQUAL
+! BANG
+"""
+
+opmap = {}
+for line in opmap_raw.splitlines():
+    if line:
+        op, name = line.split()
+        opmap[op] = getattr(token, name)