1
0
mirror of https://gitlab.com/MoonTestUse1/AdministrationItDepartmens.git synced 2025-08-14 00:25:46 +02:00

Initial commit

This commit is contained in:
MoonTestUse1
2024-12-23 19:27:44 +06:00
commit e81df4c87e
4952 changed files with 1705479 additions and 0 deletions

View File

@@ -0,0 +1,261 @@
# Grammar for 2to3. This grammar supports Python 2.x and 3.x.
# NOTE WELL: You should also follow all the steps listed at
# https://devguide.python.org/grammar/
# Start symbols for the grammar:
# file_input is a module or sequence of commands read from an input file;
# single_input is a single interactive statement;
# eval_input is the input for the eval() and input() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
file_input: (NEWLINE | stmt)* ENDMARKER
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
eval_input: testlist NEWLINE* ENDMARKER
typevar: NAME [':' expr] ['=' expr]
paramspec: '**' NAME ['=' expr]
typevartuple: '*' NAME ['=' (expr|star_expr)]
typeparam: typevar | paramspec | typevartuple
typeparams: '[' typeparam (',' typeparam)* [','] ']'
decorator: '@' namedexpr_test NEWLINE
decorators: decorator+
decorated: decorators (classdef | funcdef | async_funcdef)
async_funcdef: ASYNC funcdef
funcdef: 'def' NAME [typeparams] parameters ['->' test] ':' suite
parameters: '(' [typedargslist] ')'
# The following definition for typedarglist is equivalent to this set of rules:
#
# arguments = argument (',' argument)*
# argument = tfpdef ['=' test]
# kwargs = '**' tname [',']
# args = '*' [tname_star]
# kwonly_kwargs = (',' argument)* [',' [kwargs]]
# args_kwonly_kwargs = args kwonly_kwargs | kwargs
# poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]]
# typedargslist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
# typedarglist = arguments ',' '/' [',' [typedargslist_no_posonly]])|(typedargslist_no_posonly)"
#
# It needs to be fully expanded to allow our LL(1) parser to work on it.
typedargslist: tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [
',' [((tfpdef ['=' test] ',')* ('*' [tname_star] (',' tname ['=' test])*
[',' ['**' tname [',']]] | '**' tname [','])
| tfpdef ['=' test] (',' tfpdef ['=' test])* [','])]
] | ((tfpdef ['=' test] ',')* ('*' [tname_star] (',' tname ['=' test])*
[',' ['**' tname [',']]] | '**' tname [','])
| tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
tname: NAME [':' test]
tname_star: NAME [':' (test|star_expr)]
tfpdef: tname | '(' tfplist ')'
tfplist: tfpdef (',' tfpdef)* [',']
# The following definition for varargslist is equivalent to this set of rules:
#
# arguments = argument (',' argument )*
# argument = vfpdef ['=' test]
# kwargs = '**' vname [',']
# args = '*' [vname]
# kwonly_kwargs = (',' argument )* [',' [kwargs]]
# args_kwonly_kwargs = args kwonly_kwargs | kwargs
# poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]]
# vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
# varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] | (vararglist_no_posonly)
#
# It needs to be fully expanded to allow our LL(1) parser to work on it.
varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [
((vfpdef ['=' test] ',')* ('*' [vname] (',' vname ['=' test])*
[',' ['**' vname [',']]] | '**' vname [','])
| vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
]] | ((vfpdef ['=' test] ',')*
('*' [vname] (',' vname ['=' test])* [',' ['**' vname [',']]]| '**' vname [','])
| vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
vname: NAME
vfpdef: vname | '(' vfplist ')'
vfplist: vfpdef (',' vfpdef)* [',']
stmt: simple_stmt | compound_stmt
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (type_stmt | expr_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | assert_stmt)
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
('=' (yield_expr|testlist_star_expr))*)
annassign: ':' test ['=' (yield_expr|testlist_star_expr)]
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
'<<=' | '>>=' | '**=' | '//=')
# For normal and annotated assignments, additional restrictions enforced by the interpreter
del_stmt: 'del' exprlist
pass_stmt: 'pass'
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
break_stmt: 'break'
continue_stmt: 'continue'
return_stmt: 'return' [testlist_star_expr]
yield_stmt: yield_expr
raise_stmt: 'raise' [test ['from' test | ',' test [',' test]]]
import_stmt: import_name | import_from
import_name: 'import' dotted_as_names
import_from: ('from' ('.'* dotted_name | '.'+)
'import' ('*' | '(' import_as_names ')' | import_as_names))
import_as_name: NAME ['as' NAME]
dotted_as_name: dotted_name ['as' NAME]
import_as_names: import_as_name (',' import_as_name)* [',']
dotted_as_names: dotted_as_name (',' dotted_as_name)*
dotted_name: NAME ('.' NAME)*
global_stmt: ('global' | 'nonlocal') NAME (',' NAME)*
assert_stmt: 'assert' test [',' test]
type_stmt: "type" NAME [typeparams] '=' test
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | match_stmt
async_stmt: ASYNC (funcdef | with_stmt | for_stmt)
if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
for_stmt: 'for' exprlist 'in' testlist_star_expr ':' suite ['else' ':' suite]
try_stmt: ('try' ':' suite
((except_clause ':' suite)+
['else' ':' suite]
['finally' ':' suite] |
'finally' ':' suite))
with_stmt: 'with' asexpr_test (',' asexpr_test)* ':' suite
# NB compile.c makes sure that the default except clause is last
except_clause: 'except' ['*'] [test [(',' | 'as') test]]
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
# Backward compatibility cruft to support:
# [ x for x in lambda: True, lambda: False if x() ]
# even while also allowing:
# lambda x: 5 if x else 2
# (But not a mix of the two)
testlist_safe: old_test [(',' old_test)+ [',']]
old_test: or_test | old_lambdef
old_lambdef: 'lambda' [varargslist] ':' old_test
namedexpr_test: asexpr_test [':=' asexpr_test]
# This is actually not a real rule, though since the parser is very
# limited in terms of the strategy about match/case rules, we are inserting
# a virtual case (<expr> as <expr>) as a valid expression. Unless a better
# approach is thought, the only side effect of this seem to be just allowing
# more stuff to be parser (which would fail on the ast).
asexpr_test: test ['as' test]
test: or_test ['if' or_test 'else' test] | lambdef
or_test: and_test ('or' and_test)*
and_test: not_test ('and' not_test)*
not_test: 'not' not_test | comparison
comparison: expr (comp_op expr)*
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
star_expr: '*' expr
expr: xor_expr ('|' xor_expr)*
xor_expr: and_expr ('^' and_expr)*
and_expr: shift_expr ('&' shift_expr)*
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
arith_expr: term (('+'|'-') term)*
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
factor: ('+'|'-'|'~') factor | power
power: [AWAIT] atom trailer* ['**' factor]
atom: ('(' [yield_expr|testlist_gexp] ')' |
'[' [listmaker] ']' |
'{' [dictsetmaker] '}' |
'`' testlist1 '`' |
NAME | NUMBER | (STRING | fstring)+ | '.' '.' '.')
listmaker: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] )
testlist_gexp: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] )
lambdef: 'lambda' [varargslist] ':' test
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: (subscript|star_expr) (',' (subscript|star_expr))* [',']
subscript: test [':=' test] | [test] ':' [test] [sliceop]
sliceop: ':' [test]
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
testlist: test (',' test)* [',']
dictsetmaker: ( ((test ':' asexpr_test | '**' expr)
(comp_for | (',' (test ':' asexpr_test | '**' expr))* [','])) |
((test [':=' test] | star_expr)
(comp_for | (',' (test [':=' test] | star_expr))* [','])) )
classdef: 'class' NAME [typeparams] ['(' [arglist] ')'] ':' suite
arglist: argument (',' argument)* [',']
# "test '=' test" is really "keyword '=' test", but we have no such token.
# These need to be in a single rule to avoid grammar that is ambiguous
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
# we explicitly match '*' here, too, to give it proper precedence.
# Illegal combinations and orderings are blocked in ast.c:
# multiple (test comp_for) arguments are blocked; keyword unpackings
# that precede iterable unpackings are blocked; etc.
argument: ( test [comp_for] |
test ':=' test [comp_for] |
test 'as' test |
test '=' asexpr_test |
'**' test |
'*' test )
comp_iter: comp_for | comp_if
comp_for: [ASYNC] 'for' exprlist 'in' or_test [comp_iter]
comp_if: 'if' old_test [comp_iter]
# As noted above, testlist_safe extends the syntax allowed in list
# comprehensions and generators. We can't use it indiscriminately in all
# derivations using a comp_for-like pattern because the testlist_safe derivation
# contains comma which clashes with trailing comma in arglist.
#
# This was an issue because the parser would not follow the correct derivation
# when parsing syntactically valid Python code. Since testlist_safe was created
# specifically to handle list comprehensions and generator expressions enclosed
# with parentheses, it's safe to only use it in those. That avoids the issue; we
# can parse code like set(x for x in [],).
#
# The syntax supported by this set of rules is not a valid Python 3 syntax,
# hence the prefix "old".
#
# See https://bugs.python.org/issue27494
old_comp_iter: old_comp_for | old_comp_if
old_comp_for: [ASYNC] 'for' exprlist 'in' testlist_safe [old_comp_iter]
old_comp_if: 'if' old_test [old_comp_iter]
testlist1: test (',' test)*
# not used in grammar, but may appear in "node" passed from Parser to Compiler
encoding_decl: NAME
yield_expr: 'yield' [yield_arg]
yield_arg: 'from' test | testlist_star_expr
# 3.10 match statement definition
# PS: normally the grammar is much much more restricted, but
# at this moment for not trying to bother much with encoding the
# exact same DSL in a LL(1) parser, we will just accept an expression
# and let the ast.parse() step of the safe mode to reject invalid
# grammar.
# The reason why it is more restricted is that, patterns are some
# sort of a DSL (more advanced than our LHS on assignments, but
# still in a very limited python subset). They are not really
# expressions, but who cares. If we can parse them, that is enough
# to reformat them.
match_stmt: "match" subject_expr ':' NEWLINE INDENT case_block+ DEDENT
# This is more permissive than the actual version. For example it
# accepts `match *something:`, even though single-item starred expressions
# are forbidden.
subject_expr: (namedexpr_test|star_expr) (',' (namedexpr_test|star_expr))* [',']
# cases
case_block: "case" patterns [guard] ':' suite
guard: 'if' namedexpr_test
patterns: pattern (',' pattern)* [',']
pattern: (expr|star_expr) ['as' expr]
fstring: FSTRING_START fstring_middle* FSTRING_END
fstring_middle: fstring_replacement_field | FSTRING_MIDDLE
fstring_replacement_field: '{' (yield_expr | testlist_star_expr) ['='] [ "!" NAME ] [ ':' fstring_format_spec* ] '}'
fstring_format_spec: FSTRING_MIDDLE | fstring_replacement_field

View File

@@ -0,0 +1,254 @@
A. HISTORY OF THE SOFTWARE
==========================
Python was created in the early 1990s by Guido van Rossum at Stichting
Mathematisch Centrum (CWI, see https://www.cwi.nl) in the Netherlands
as a successor of a language called ABC. Guido remains Python's
principal author, although it includes many contributions from others.
In 1995, Guido continued his work on Python at the Corporation for
National Research Initiatives (CNRI, see https://www.cnri.reston.va.us)
in Reston, Virginia where he released several versions of the
software.
In May 2000, Guido and the Python core development team moved to
BeOpen.com to form the BeOpen PythonLabs team. In October of the same
year, the PythonLabs team moved to Digital Creations, which became
Zope Corporation. In 2001, the Python Software Foundation (PSF, see
https://www.python.org/psf/) was formed, a non-profit organization
created specifically to own Python-related Intellectual Property.
Zope Corporation was a sponsoring member of the PSF.
All Python releases are Open Source (see https://opensource.org for
the Open Source Definition). Historically, most, but not all, Python
releases have also been GPL-compatible; the table below summarizes
the various releases.
Release Derived Year Owner GPL-
from compatible? (1)
0.9.0 thru 1.2 1991-1995 CWI yes
1.3 thru 1.5.2 1.2 1995-1999 CNRI yes
1.6 1.5.2 2000 CNRI no
2.0 1.6 2000 BeOpen.com no
1.6.1 1.6 2001 CNRI yes (2)
2.1 2.0+1.6.1 2001 PSF no
2.0.1 2.0+1.6.1 2001 PSF yes
2.1.1 2.1+2.0.1 2001 PSF yes
2.1.2 2.1.1 2002 PSF yes
2.1.3 2.1.2 2002 PSF yes
2.2 and above 2.1.1 2001-now PSF yes
Footnotes:
(1) GPL-compatible doesn't mean that we're distributing Python under
the GPL. All Python licenses, unlike the GPL, let you distribute
a modified version without making your changes open source. The
GPL-compatible licenses make it possible to combine Python with
other software that is released under the GPL; the others don't.
(2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
because its license has a choice of law clause. According to
CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
is "not incompatible" with the GPL.
Thanks to the many outside volunteers who have worked under Guido's
direction to make these releases possible.
B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
===============================================================
PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
--------------------------------------------
1. This LICENSE AGREEMENT is between the Python Software Foundation
("PSF"), and the Individual or Organization ("Licensee") accessing and
otherwise using this software ("Python") in source or binary form and
its associated documentation.
2. Subject to the terms and conditions of this License Agreement, PSF hereby
grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
analyze, test, perform and/or display publicly, prepare derivative works,
distribute, and otherwise use Python alone or in any derivative version,
provided, however, that PSF's License Agreement and PSF's notice of copyright,
i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 Python Software Foundation; All
Rights Reserved" are retained in Python alone or in any derivative version
prepared by Licensee.
3. In the event Licensee prepares a derivative work that is based on
or incorporates Python or any part thereof, and wants to make
the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary of
the changes made to Python.
4. PSF is making Python available to Licensee on an "AS IS"
basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between PSF and
Licensee. This License Agreement does not grant permission to use PSF
trademarks or trade name in a trademark sense to endorse or promote
products or services of Licensee, or any third party.
8. By copying, installing or otherwise using Python, Licensee
agrees to be bound by the terms and conditions of this License
Agreement.
BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
-------------------------------------------
BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
Individual or Organization ("Licensee") accessing and otherwise using
this software in source or binary form and its associated
documentation ("the Software").
2. Subject to the terms and conditions of this BeOpen Python License
Agreement, BeOpen hereby grants Licensee a non-exclusive,
royalty-free, world-wide license to reproduce, analyze, test, perform
and/or display publicly, prepare derivative works, distribute, and
otherwise use the Software alone or in any derivative version,
provided, however, that the BeOpen Python License is retained in the
Software, alone or in any derivative version prepared by Licensee.
3. BeOpen is making the Software available to Licensee on an "AS IS"
basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
5. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
6. This License Agreement shall be governed by and interpreted in all
respects by the law of the State of California, excluding conflict of
law provisions. Nothing in this License Agreement shall be deemed to
create any relationship of agency, partnership, or joint venture
between BeOpen and Licensee. This License Agreement does not grant
permission to use BeOpen trademarks or trade names in a trademark
sense to endorse or promote products or services of Licensee, or any
third party. As an exception, the "BeOpen Python" logos available at
http://www.pythonlabs.com/logos.html may be used according to the
permissions granted on that web page.
7. By copying, installing or otherwise using the software, Licensee
agrees to be bound by the terms and conditions of this License
Agreement.
CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
---------------------------------------
1. This LICENSE AGREEMENT is between the Corporation for National
Research Initiatives, having an office at 1895 Preston White Drive,
Reston, VA 20191 ("CNRI"), and the Individual or Organization
("Licensee") accessing and otherwise using Python 1.6.1 software in
source or binary form and its associated documentation.
2. Subject to the terms and conditions of this License Agreement, CNRI
hereby grants Licensee a nonexclusive, royalty-free, world-wide
license to reproduce, analyze, test, perform and/or display publicly,
prepare derivative works, distribute, and otherwise use Python 1.6.1
alone or in any derivative version, provided, however, that CNRI's
License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
1995-2001 Corporation for National Research Initiatives; All Rights
Reserved" are retained in Python 1.6.1 alone or in any derivative
version prepared by Licensee. Alternately, in lieu of CNRI's License
Agreement, Licensee may substitute the following text (omitting the
quotes): "Python 1.6.1 is made available subject to the terms and
conditions in CNRI's License Agreement. This Agreement together with
Python 1.6.1 may be located on the Internet using the following
unique, persistent identifier (known as a handle): 1895.22/1013. This
Agreement may also be obtained from a proxy server on the Internet
using the following URL: http://hdl.handle.net/1895.22/1013".
3. In the event Licensee prepares a derivative work that is based on
or incorporates Python 1.6.1 or any part thereof, and wants to make
the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary of
the changes made to Python 1.6.1.
4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
7. This License Agreement shall be governed by the federal
intellectual property law of the United States, including without
limitation the federal copyright law, and, to the extent such
U.S. federal law does not apply, by the law of the Commonwealth of
Virginia, excluding Virginia's conflict of law provisions.
Notwithstanding the foregoing, with regard to derivative works based
on Python 1.6.1 that incorporate non-separable material that was
previously distributed under the GNU General Public License (GPL), the
law of the Commonwealth of Virginia shall govern this License
Agreement only as to issues arising under or with respect to
Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this
License Agreement shall be deemed to create any relationship of
agency, partnership, or joint venture between CNRI and Licensee. This
License Agreement does not grant permission to use CNRI trademarks or
trade name in a trademark sense to endorse or promote products or
services of Licensee, or any third party.
8. By clicking on the "ACCEPT" button where indicated, or by copying,
installing or otherwise using Python 1.6.1, Licensee agrees to be
bound by the terms and conditions of this License Agreement.
ACCEPT
CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
--------------------------------------------------
Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
The Netherlands. All rights reserved.
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation, and that the name of Stichting Mathematisch
Centrum or CWI not be used in advertising or publicity pertaining to
distribution of the software without specific, written prior
permission.
STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

View File

@@ -0,0 +1,28 @@
# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# A grammar to describe tree matching patterns.
# Not shown here:
# - 'TOKEN' stands for any token (leaf node)
# - 'any' stands for any node (leaf or interior)
# With 'any' we can still specify the sub-structure.
# The start symbol is 'Matcher'.
Matcher: Alternatives ENDMARKER
Alternatives: Alternative ('|' Alternative)*
Alternative: (Unit | NegatedUnit)+
Unit: [NAME '='] ( STRING [Repeater]
| NAME [Details] [Repeater]
| '(' Alternatives ')' [Repeater]
| '[' Alternatives ']'
)
NegatedUnit: 'not' (STRING | NAME [Details] | '(' Alternatives ')')
Repeater: '*' | '+' | '{' NUMBER [',' NUMBER] '}'
Details: '<' Alternatives '>'

View File

@@ -0,0 +1,24 @@
A subset of lib2to3 taken from Python 3.7.0b2. Commit hash:
9c17e3a1987004b8bcfbe423953aad84493a7984
Reasons for forking:
- consistent handling of f-strings for users of Python < 3.6.2
- backport of BPO-33064 that fixes parsing files with trailing commas after \*args and
\*\*kwargs
- backport of GH-6143 that restores the ability to reformat legacy usage of `async`
- support all types of string literals
- better ability to debug (better reprs)
- INDENT and DEDENT don't hold whitespace and comment prefixes
- ability to Cythonize
Change Log:
- Changes default logger used by Driver
- Backported the following upstream parser changes:
- "bpo-42381: Allow walrus in set literals and set comprehensions (GH-23332)"
https://github.com/python/cpython/commit/cae60187cf7a7b26281d012e1952fafe4e2e97e9
- "bpo-42316: Allow unparenthesized walrus operator in indexes (GH-23317)"
https://github.com/python/cpython/commit/b0aba1fcdc3da952698d99aec2334faa79a8b68c
- Tweaks to help mypyc compile faster code (including inlining type information,
"Final-ing", etc.)

View File

@@ -0,0 +1 @@
# empty

View File

@@ -0,0 +1,4 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""The pgen2 package."""

View File

@@ -0,0 +1,256 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# mypy: ignore-errors
"""Convert graminit.[ch] spit out by pgen to Python code.
Pgen is the Python parser generator. It is useful to quickly create a
parser from a grammar file in Python's grammar notation. But I don't
want my parsers to be written in C (yet), so I'm translating the
parsing tables to Python data structures and writing a Python parse
engine.
Note that the token numbers are constants determined by the standard
Python tokenizer. The standard token module defines these numbers and
their names (the names are not used much). The token numbers are
hardcoded into the Python tokenizer and into pgen. A Python
implementation of the Python tokenizer is also available, in the
standard tokenize module.
On the other hand, symbol numbers (representing the grammar's
non-terminals) are assigned by pgen based on the actual grammar
input.
Note: this module is pretty much obsolete; the pgen module generates
equivalent grammar tables directly from the Grammar.txt input file
without having to invoke the Python pgen C program.
"""
# Python imports
import re
# Local imports
from pgen2 import grammar, token
class Converter(grammar.Grammar):
"""Grammar subclass that reads classic pgen output files.
The run() method reads the tables as produced by the pgen parser
generator, typically contained in two C files, graminit.h and
graminit.c. The other methods are for internal use only.
See the base class for more documentation.
"""
def run(self, graminit_h, graminit_c):
"""Load the grammar tables from the text files written by pgen."""
self.parse_graminit_h(graminit_h)
self.parse_graminit_c(graminit_c)
self.finish_off()
def parse_graminit_h(self, filename):
"""Parse the .h file written by pgen. (Internal)
This file is a sequence of #define statements defining the
nonterminals of the grammar as numbers. We build two tables
mapping the numbers to names and back.
"""
try:
f = open(filename)
except OSError as err:
print(f"Can't open {filename}: {err}")
return False
self.symbol2number = {}
self.number2symbol = {}
lineno = 0
for line in f:
lineno += 1
mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
if not mo and line.strip():
print(f"{filename}({lineno}): can't parse {line.strip()}")
else:
symbol, number = mo.groups()
number = int(number)
assert symbol not in self.symbol2number
assert number not in self.number2symbol
self.symbol2number[symbol] = number
self.number2symbol[number] = symbol
return True
def parse_graminit_c(self, filename):
"""Parse the .c file written by pgen. (Internal)
The file looks as follows. The first two lines are always this:
#include "pgenheaders.h"
#include "grammar.h"
After that come four blocks:
1) one or more state definitions
2) a table defining dfas
3) a table defining labels
4) a struct defining the grammar
A state definition has the following form:
- one or more arc arrays, each of the form:
static arc arcs_<n>_<m>[<k>] = {
{<i>, <j>},
...
};
- followed by a state array, of the form:
static state states_<s>[<t>] = {
{<k>, arcs_<n>_<m>},
...
};
"""
try:
f = open(filename)
except OSError as err:
print(f"Can't open {filename}: {err}")
return False
# The code below essentially uses f's iterator-ness!
lineno = 0
# Expect the two #include lines
lineno, line = lineno + 1, next(f)
assert line == '#include "pgenheaders.h"\n', (lineno, line)
lineno, line = lineno + 1, next(f)
assert line == '#include "grammar.h"\n', (lineno, line)
# Parse the state definitions
lineno, line = lineno + 1, next(f)
allarcs = {}
states = []
while line.startswith("static arc "):
while line.startswith("static arc "):
mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$", line)
assert mo, (lineno, line)
n, m, k = list(map(int, mo.groups()))
arcs = []
for _ in range(k):
lineno, line = lineno + 1, next(f)
mo = re.match(r"\s+{(\d+), (\d+)},$", line)
assert mo, (lineno, line)
i, j = list(map(int, mo.groups()))
arcs.append((i, j))
lineno, line = lineno + 1, next(f)
assert line == "};\n", (lineno, line)
allarcs[(n, m)] = arcs
lineno, line = lineno + 1, next(f)
mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
assert mo, (lineno, line)
s, t = list(map(int, mo.groups()))
assert s == len(states), (lineno, line)
state = []
for _ in range(t):
lineno, line = lineno + 1, next(f)
mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
assert mo, (lineno, line)
k, n, m = list(map(int, mo.groups()))
arcs = allarcs[n, m]
assert k == len(arcs), (lineno, line)
state.append(arcs)
states.append(state)
lineno, line = lineno + 1, next(f)
assert line == "};\n", (lineno, line)
lineno, line = lineno + 1, next(f)
self.states = states
# Parse the dfas
dfas = {}
mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)
assert mo, (lineno, line)
ndfas = int(mo.group(1))
for i in range(ndfas):
lineno, line = lineno + 1, next(f)
mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$', line)
assert mo, (lineno, line)
symbol = mo.group(2)
number, x, y, z = list(map(int, mo.group(1, 3, 4, 5)))
assert self.symbol2number[symbol] == number, (lineno, line)
assert self.number2symbol[number] == symbol, (lineno, line)
assert x == 0, (lineno, line)
state = states[z]
assert y == len(state), (lineno, line)
lineno, line = lineno + 1, next(f)
mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
assert mo, (lineno, line)
first = {}
rawbitset = eval(mo.group(1))
for i, c in enumerate(rawbitset):
byte = ord(c)
for j in range(8):
if byte & (1 << j):
first[i * 8 + j] = 1
dfas[number] = (state, first)
lineno, line = lineno + 1, next(f)
assert line == "};\n", (lineno, line)
self.dfas = dfas
# Parse the labels
labels = []
lineno, line = lineno + 1, next(f)
mo = re.match(r"static label labels\[(\d+)\] = {$", line)
assert mo, (lineno, line)
nlabels = int(mo.group(1))
for i in range(nlabels):
lineno, line = lineno + 1, next(f)
mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
assert mo, (lineno, line)
x, y = mo.groups()
x = int(x)
if y == "0":
y = None
else:
y = eval(y)
labels.append((x, y))
lineno, line = lineno + 1, next(f)
assert line == "};\n", (lineno, line)
self.labels = labels
# Parse the grammar struct
lineno, line = lineno + 1, next(f)
assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
lineno, line = lineno + 1, next(f)
mo = re.match(r"\s+(\d+),$", line)
assert mo, (lineno, line)
ndfas = int(mo.group(1))
assert ndfas == len(self.dfas)
lineno, line = lineno + 1, next(f)
assert line == "\tdfas,\n", (lineno, line)
lineno, line = lineno + 1, next(f)
mo = re.match(r"\s+{(\d+), labels},$", line)
assert mo, (lineno, line)
nlabels = int(mo.group(1))
assert nlabels == len(self.labels), (lineno, line)
lineno, line = lineno + 1, next(f)
mo = re.match(r"\s+(\d+)$", line)
assert mo, (lineno, line)
start = int(mo.group(1))
assert start in self.number2symbol, (lineno, line)
self.start = start
lineno, line = lineno + 1, next(f)
assert line == "};\n", (lineno, line)
try:
lineno, line = lineno + 1, next(f)
except StopIteration:
pass
else:
assert 0, (lineno, line)
def finish_off(self):
"""Create additional useful structures. (Internal)."""
self.keywords = {} # map from keyword strings to arc labels
self.tokens = {} # map from numeric token values to arc labels
for ilabel, (type, value) in enumerate(self.labels):
if type == token.NAME and value is not None:
self.keywords[value] = ilabel
elif value is None:
self.tokens[type] = ilabel

View File

@@ -0,0 +1,318 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Modifications:
# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Parser driver.
This provides a high-level interface to parse a file into a syntax tree.
"""
__author__ = "Guido van Rossum <guido@python.org>"
__all__ = ["Driver", "load_grammar"]
# Python imports
import io
import logging
import os
import pkgutil
import sys
from contextlib import contextmanager
from dataclasses import dataclass, field
from logging import Logger
from typing import IO, Any, Iterable, Iterator, Optional, Union, cast
from blib2to3.pgen2.grammar import Grammar
from blib2to3.pgen2.tokenize import GoodTokenInfo
from blib2to3.pytree import NL
# Pgen imports
from . import grammar, parse, pgen, token, tokenize
Path = Union[str, "os.PathLike[str]"]
@dataclass
class ReleaseRange:
start: int
end: Optional[int] = None
tokens: list[Any] = field(default_factory=list)
def lock(self) -> None:
total_eaten = len(self.tokens)
self.end = self.start + total_eaten
class TokenProxy:
def __init__(self, generator: Any) -> None:
self._tokens = generator
self._counter = 0
self._release_ranges: list[ReleaseRange] = []
@contextmanager
def release(self) -> Iterator["TokenProxy"]:
release_range = ReleaseRange(self._counter)
self._release_ranges.append(release_range)
try:
yield self
finally:
# Lock the last release range to the final position that
# has been eaten.
release_range.lock()
def eat(self, point: int) -> Any:
eaten_tokens = self._release_ranges[-1].tokens
if point < len(eaten_tokens):
return eaten_tokens[point]
else:
while point >= len(eaten_tokens):
token = next(self._tokens)
eaten_tokens.append(token)
return token
def __iter__(self) -> "TokenProxy":
return self
def __next__(self) -> Any:
# If the current position is already compromised (looked up)
# return the eaten token, if not just go further on the given
# token producer.
for release_range in self._release_ranges:
assert release_range.end is not None
start, end = release_range.start, release_range.end
if start <= self._counter < end:
token = release_range.tokens[self._counter - start]
break
else:
token = next(self._tokens)
self._counter += 1
return token
def can_advance(self, to: int) -> bool:
# Try to eat, fail if it can't. The eat operation is cached
# so there won't be any additional cost of eating here
try:
self.eat(to)
except StopIteration:
return False
else:
return True
class Driver:
def __init__(self, grammar: Grammar, logger: Optional[Logger] = None) -> None:
self.grammar = grammar
if logger is None:
logger = logging.getLogger(__name__)
self.logger = logger
def parse_tokens(self, tokens: Iterable[GoodTokenInfo], debug: bool = False) -> NL:
"""Parse a series of tokens and return the syntax tree."""
# XXX Move the prefix computation into a wrapper around tokenize.
proxy = TokenProxy(tokens)
p = parse.Parser(self.grammar)
p.setup(proxy=proxy)
lineno = 1
column = 0
indent_columns: list[int] = []
type = value = start = end = line_text = None
prefix = ""
for quintuple in proxy:
type, value, start, end, line_text = quintuple
if start != (lineno, column):
assert (lineno, column) <= start, ((lineno, column), start)
s_lineno, s_column = start
if lineno < s_lineno:
prefix += "\n" * (s_lineno - lineno)
lineno = s_lineno
column = 0
if column < s_column:
prefix += line_text[column:s_column]
column = s_column
if type in (tokenize.COMMENT, tokenize.NL):
prefix += value
lineno, column = end
if value.endswith("\n"):
lineno += 1
column = 0
continue
if type == token.OP:
type = grammar.opmap[value]
if debug:
assert type is not None
self.logger.debug(
"%s %r (prefix=%r)", token.tok_name[type], value, prefix
)
if type == token.INDENT:
indent_columns.append(len(value))
_prefix = prefix + value
prefix = ""
value = ""
elif type == token.DEDENT:
_indent_col = indent_columns.pop()
prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col)
if p.addtoken(cast(int, type), value, (prefix, start)):
if debug:
self.logger.debug("Stop.")
break
prefix = ""
if type in {token.INDENT, token.DEDENT}:
prefix = _prefix
lineno, column = end
# FSTRING_MIDDLE is the only token that can end with a newline, and
# `end` will point to the next line. For that case, don't increment lineno.
if value.endswith("\n") and type != token.FSTRING_MIDDLE:
lineno += 1
column = 0
else:
# We never broke out -- EOF is too soon (how can this happen???)
assert start is not None
raise parse.ParseError("incomplete input", type, value, (prefix, start))
assert p.rootnode is not None
return p.rootnode
def parse_stream_raw(self, stream: IO[str], debug: bool = False) -> NL:
"""Parse a stream and return the syntax tree."""
tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
return self.parse_tokens(tokens, debug)
def parse_stream(self, stream: IO[str], debug: bool = False) -> NL:
"""Parse a stream and return the syntax tree."""
return self.parse_stream_raw(stream, debug)
def parse_file(
self, filename: Path, encoding: Optional[str] = None, debug: bool = False
) -> NL:
"""Parse a file and return the syntax tree."""
with open(filename, encoding=encoding) as stream:
return self.parse_stream(stream, debug)
def parse_string(self, text: str, debug: bool = False) -> NL:
"""Parse a string and return the syntax tree."""
tokens = tokenize.generate_tokens(
io.StringIO(text).readline, grammar=self.grammar
)
return self.parse_tokens(tokens, debug)
def _partially_consume_prefix(self, prefix: str, column: int) -> tuple[str, str]:
lines: list[str] = []
current_line = ""
current_column = 0
wait_for_nl = False
for char in prefix:
current_line += char
if wait_for_nl:
if char == "\n":
if current_line.strip() and current_column < column:
res = "".join(lines)
return res, prefix[len(res) :]
lines.append(current_line)
current_line = ""
current_column = 0
wait_for_nl = False
elif char in " \t":
current_column += 1
elif char == "\n":
# unexpected empty line
current_column = 0
elif char == "\f":
current_column = 0
else:
# indent is finished
wait_for_nl = True
return "".join(lines), current_line
def _generate_pickle_name(gt: Path, cache_dir: Optional[Path] = None) -> str:
head, tail = os.path.splitext(gt)
if tail == ".txt":
tail = ""
name = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
if cache_dir:
return os.path.join(cache_dir, os.path.basename(name))
else:
return name
def load_grammar(
gt: str = "Grammar.txt",
gp: Optional[str] = None,
save: bool = True,
force: bool = False,
logger: Optional[Logger] = None,
) -> Grammar:
"""Load the grammar (maybe from a pickle)."""
if logger is None:
logger = logging.getLogger(__name__)
gp = _generate_pickle_name(gt) if gp is None else gp
if force or not _newer(gp, gt):
g: grammar.Grammar = pgen.generate_grammar(gt)
if save:
try:
g.dump(gp)
except OSError:
# Ignore error, caching is not vital.
pass
else:
g = grammar.Grammar()
g.load(gp)
return g
def _newer(a: str, b: str) -> bool:
"""Inquire whether file a was written since file b."""
if not os.path.exists(a):
return False
if not os.path.exists(b):
return True
return os.path.getmtime(a) >= os.path.getmtime(b)
def load_packaged_grammar(
package: str, grammar_source: str, cache_dir: Optional[Path] = None
) -> grammar.Grammar:
"""Normally, loads a pickled grammar by doing
pkgutil.get_data(package, pickled_grammar)
where *pickled_grammar* is computed from *grammar_source* by adding the
Python version and using a ``.pickle`` extension.
However, if *grammar_source* is an extant file, load_grammar(grammar_source)
is called instead. This facilitates using a packaged grammar file when needed
but preserves load_grammar's automatic regeneration behavior when possible.
"""
if os.path.isfile(grammar_source):
gp = _generate_pickle_name(grammar_source, cache_dir) if cache_dir else None
return load_grammar(grammar_source, gp=gp)
pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
data = pkgutil.get_data(package, pickled_name)
assert data is not None
g = grammar.Grammar()
g.loads(data)
return g
def main(*args: str) -> bool:
"""Main program, when run as a script: produce grammar pickle files.
Calls load_grammar for each argument, a path to a grammar text file.
"""
if not args:
args = tuple(sys.argv[1:])
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
for gt in args:
load_grammar(gt, save=True, force=True)
return True
if __name__ == "__main__":
sys.exit(int(not main()))

View File

@@ -0,0 +1,228 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""This module defines the data structures used to represent a grammar.
These are a bit arcane because they are derived from the data
structures used by Python's 'pgen' parser generator.
There's also a table here mapping operators to their names in the
token module; the Python tokenize module reports all operators as the
fallback token code OP, but the parser needs the actual token code.
"""
# Python imports
import os
import pickle
import tempfile
from typing import Any, Optional, TypeVar, Union
# Local imports
from . import token
_P = TypeVar("_P", bound="Grammar")
Label = tuple[int, Optional[str]]
DFA = list[list[tuple[int, int]]]
DFAS = tuple[DFA, dict[int, int]]
Path = Union[str, "os.PathLike[str]"]
class Grammar:
"""Pgen parsing tables conversion class.
Once initialized, this class supplies the grammar tables for the
parsing engine implemented by parse.py. The parsing engine
accesses the instance variables directly. The class here does not
provide initialization of the tables; several subclasses exist to
do this (see the conv and pgen modules).
The load() method reads the tables from a pickle file, which is
much faster than the other ways offered by subclasses. The pickle
file is written by calling dump() (after loading the grammar
tables using a subclass). The report() method prints a readable
representation of the tables to stdout, for debugging.
The instance variables are as follows:
symbol2number -- a dict mapping symbol names to numbers. Symbol
numbers are always 256 or higher, to distinguish
them from token numbers, which are between 0 and
255 (inclusive).
number2symbol -- a dict mapping numbers to symbol names;
these two are each other's inverse.
states -- a list of DFAs, where each DFA is a list of
states, each state is a list of arcs, and each
arc is a (i, j) pair where i is a label and j is
a state number. The DFA number is the index into
this list. (This name is slightly confusing.)
Final states are represented by a special arc of
the form (0, j) where j is its own state number.
dfas -- a dict mapping symbol numbers to (DFA, first)
pairs, where DFA is an item from the states list
above, and first is a set of tokens that can
begin this grammar rule (represented by a dict
whose values are always 1).
labels -- a list of (x, y) pairs where x is either a token
number or a symbol number, and y is either None
or a string; the strings are keywords. The label
number is the index in this list; label numbers
are used to mark state transitions (arcs) in the
DFAs.
start -- the number of the grammar's start symbol.
keywords -- a dict mapping keyword strings to arc labels.
tokens -- a dict mapping token numbers to arc labels.
"""
def __init__(self) -> None:
self.symbol2number: dict[str, int] = {}
self.number2symbol: dict[int, str] = {}
self.states: list[DFA] = []
self.dfas: dict[int, DFAS] = {}
self.labels: list[Label] = [(0, "EMPTY")]
self.keywords: dict[str, int] = {}
self.soft_keywords: dict[str, int] = {}
self.tokens: dict[int, int] = {}
self.symbol2label: dict[str, int] = {}
self.version: tuple[int, int] = (0, 0)
self.start = 256
# Python 3.7+ parses async as a keyword, not an identifier
self.async_keywords = False
def dump(self, filename: Path) -> None:
"""Dump the grammar tables to a pickle file."""
# mypyc generates objects that don't have a __dict__, but they
# do have __getstate__ methods that will return an equivalent
# dictionary
if hasattr(self, "__dict__"):
d = self.__dict__
else:
d = self.__getstate__() # type: ignore
with tempfile.NamedTemporaryFile(
dir=os.path.dirname(filename), delete=False
) as f:
pickle.dump(d, f, pickle.HIGHEST_PROTOCOL)
os.replace(f.name, filename)
def _update(self, attrs: dict[str, Any]) -> None:
for k, v in attrs.items():
setattr(self, k, v)
def load(self, filename: Path) -> None:
"""Load the grammar tables from a pickle file."""
with open(filename, "rb") as f:
d = pickle.load(f)
self._update(d)
def loads(self, pkl: bytes) -> None:
"""Load the grammar tables from a pickle bytes object."""
self._update(pickle.loads(pkl))
def copy(self: _P) -> _P:
"""
Copy the grammar.
"""
new = self.__class__()
for dict_attr in (
"symbol2number",
"number2symbol",
"dfas",
"keywords",
"soft_keywords",
"tokens",
"symbol2label",
):
setattr(new, dict_attr, getattr(self, dict_attr).copy())
new.labels = self.labels[:]
new.states = self.states[:]
new.start = self.start
new.version = self.version
new.async_keywords = self.async_keywords
return new
def report(self) -> None:
"""Dump the grammar tables to standard output, for debugging."""
from pprint import pprint
print("s2n")
pprint(self.symbol2number)
print("n2s")
pprint(self.number2symbol)
print("states")
pprint(self.states)
print("dfas")
pprint(self.dfas)
print("labels")
pprint(self.labels)
print("start", self.start)
# Map from operator to number (since tokenize doesn't do this)
opmap_raw = """
( LPAR
) RPAR
[ LSQB
] RSQB
: COLON
, COMMA
; SEMI
+ PLUS
- MINUS
* STAR
/ SLASH
| VBAR
& AMPER
< LESS
> GREATER
= EQUAL
. DOT
% PERCENT
` BACKQUOTE
{ LBRACE
} RBRACE
@ AT
@= ATEQUAL
== EQEQUAL
!= NOTEQUAL
<> NOTEQUAL
<= LESSEQUAL
>= GREATEREQUAL
~ TILDE
^ CIRCUMFLEX
<< LEFTSHIFT
>> RIGHTSHIFT
** DOUBLESTAR
+= PLUSEQUAL
-= MINEQUAL
*= STAREQUAL
/= SLASHEQUAL
%= PERCENTEQUAL
&= AMPEREQUAL
|= VBAREQUAL
^= CIRCUMFLEXEQUAL
<<= LEFTSHIFTEQUAL
>>= RIGHTSHIFTEQUAL
**= DOUBLESTAREQUAL
// DOUBLESLASH
//= DOUBLESLASHEQUAL
-> RARROW
:= COLONEQUAL
! BANG
"""
opmap = {}
for line in opmap_raw.splitlines():
if line:
op, name = line.split()
opmap[op] = getattr(token, name)

View File

@@ -0,0 +1,66 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Safely evaluate Python string literals without using eval()."""
import re
from typing import Match
simple_escapes: dict[str, str] = {
"a": "\a",
"b": "\b",
"f": "\f",
"n": "\n",
"r": "\r",
"t": "\t",
"v": "\v",
"'": "'",
'"': '"',
"\\": "\\",
}
def escape(m: Match[str]) -> str:
all, tail = m.group(0, 1)
assert all.startswith("\\")
esc = simple_escapes.get(tail)
if esc is not None:
return esc
if tail.startswith("x"):
hexes = tail[1:]
if len(hexes) < 2:
raise ValueError("invalid hex string escape ('\\%s')" % tail)
try:
i = int(hexes, 16)
except ValueError:
raise ValueError("invalid hex string escape ('\\%s')" % tail) from None
else:
try:
i = int(tail, 8)
except ValueError:
raise ValueError("invalid octal string escape ('\\%s')" % tail) from None
return chr(i)
def evalString(s: str) -> str:
assert s.startswith("'") or s.startswith('"'), repr(s[:1])
q = s[0]
if s[:3] == q * 3:
q = q * 3
assert s.endswith(q), repr(s[-len(q) :])
assert len(s) >= 2 * len(q)
s = s[len(q) : -len(q)]
return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s)
def test() -> None:
for i in range(256):
c = chr(i)
s = repr(c)
e = evalString(s)
if e != c:
print(i, c, s, e)
if __name__ == "__main__":
test()

View File

@@ -0,0 +1,399 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Parser engine for the grammar tables generated by pgen.
The grammar table must be loaded first.
See Parser/parser.c in the Python distribution for additional info on
how this parsing engine works.
"""
from contextlib import contextmanager
from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, Union, cast
from blib2to3.pgen2.grammar import Grammar
from blib2to3.pytree import NL, Context, Leaf, Node, RawNode, convert
# Local imports
from . import grammar, token, tokenize
if TYPE_CHECKING:
from blib2to3.pgen2.driver import TokenProxy
Results = dict[str, NL]
Convert = Callable[[Grammar, RawNode], Union[Node, Leaf]]
DFA = list[list[tuple[int, int]]]
DFAS = tuple[DFA, dict[int, int]]
def lam_sub(grammar: Grammar, node: RawNode) -> NL:
assert node[3] is not None
return Node(type=node[0], children=node[3], context=node[2])
# A placeholder node, used when parser is backtracking.
DUMMY_NODE = (-1, None, None, None)
def stack_copy(
stack: list[tuple[DFAS, int, RawNode]],
) -> list[tuple[DFAS, int, RawNode]]:
"""Nodeless stack copy."""
return [(dfa, label, DUMMY_NODE) for dfa, label, _ in stack]
class Recorder:
def __init__(self, parser: "Parser", ilabels: list[int], context: Context) -> None:
self.parser = parser
self._ilabels = ilabels
self.context = context # not really matter
self._dead_ilabels: set[int] = set()
self._start_point = self.parser.stack
self._points = {ilabel: stack_copy(self._start_point) for ilabel in ilabels}
@property
def ilabels(self) -> set[int]:
return self._dead_ilabels.symmetric_difference(self._ilabels)
@contextmanager
def switch_to(self, ilabel: int) -> Iterator[None]:
with self.backtrack():
self.parser.stack = self._points[ilabel]
try:
yield
except ParseError:
self._dead_ilabels.add(ilabel)
finally:
self.parser.stack = self._start_point
@contextmanager
def backtrack(self) -> Iterator[None]:
"""
Use the node-level invariant ones for basic parsing operations (push/pop/shift).
These still will operate on the stack; but they won't create any new nodes, or
modify the contents of any other existing nodes.
This saves us a ton of time when we are backtracking, since we
want to restore to the initial state as quick as possible, which
can only be done by having as little mutatations as possible.
"""
is_backtracking = self.parser.is_backtracking
try:
self.parser.is_backtracking = True
yield
finally:
self.parser.is_backtracking = is_backtracking
def add_token(self, tok_type: int, tok_val: str, raw: bool = False) -> None:
func: Callable[..., Any]
if raw:
func = self.parser._addtoken
else:
func = self.parser.addtoken
for ilabel in self.ilabels:
with self.switch_to(ilabel):
args = [tok_type, tok_val, self.context]
if raw:
args.insert(0, ilabel)
func(*args)
def determine_route(
self, value: Optional[str] = None, force: bool = False
) -> Optional[int]:
alive_ilabels = self.ilabels
if len(alive_ilabels) == 0:
*_, most_successful_ilabel = self._dead_ilabels
raise ParseError("bad input", most_successful_ilabel, value, self.context)
ilabel, *rest = alive_ilabels
if force or not rest:
return ilabel
else:
return None
class ParseError(Exception):
"""Exception to signal the parser is stuck."""
def __init__(
self, msg: str, type: Optional[int], value: Optional[str], context: Context
) -> None:
Exception.__init__(
self, f"{msg}: type={type!r}, value={value!r}, context={context!r}"
)
self.msg = msg
self.type = type
self.value = value
self.context = context
class Parser:
"""Parser engine.
The proper usage sequence is:
p = Parser(grammar, [converter]) # create instance
p.setup([start]) # prepare for parsing
<for each input token>:
if p.addtoken(...): # parse a token; may raise ParseError
break
root = p.rootnode # root of abstract syntax tree
A Parser instance may be reused by calling setup() repeatedly.
A Parser instance contains state pertaining to the current token
sequence, and should not be used concurrently by different threads
to parse separate token sequences.
See driver.py for how to get input tokens by tokenizing a file or
string.
Parsing is complete when addtoken() returns True; the root of the
abstract syntax tree can then be retrieved from the rootnode
instance variable. When a syntax error occurs, addtoken() raises
the ParseError exception. There is no error recovery; the parser
cannot be used after a syntax error was reported (but it can be
reinitialized by calling setup()).
"""
def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None:
"""Constructor.
The grammar argument is a grammar.Grammar instance; see the
grammar module for more information.
The parser is not ready yet for parsing; you must call the
setup() method to get it started.
The optional convert argument is a function mapping concrete
syntax tree nodes to abstract syntax tree nodes. If not
given, no conversion is done and the syntax tree produced is
the concrete syntax tree. If given, it must be a function of
two arguments, the first being the grammar (a grammar.Grammar
instance), and the second being the concrete syntax tree node
to be converted. The syntax tree is converted from the bottom
up.
**post-note: the convert argument is ignored since for Black's
usage, convert will always be blib2to3.pytree.convert. Allowing
this to be dynamic hurts mypyc's ability to use early binding.
These docs are left for historical and informational value.
A concrete syntax tree node is a (type, value, context, nodes)
tuple, where type is the node type (a token or symbol number),
value is None for symbols and a string for tokens, context is
None or an opaque value used for error reporting (typically a
(lineno, offset) pair), and nodes is a list of children for
symbols, and None for tokens.
An abstract syntax tree node may be anything; this is entirely
up to the converter function.
"""
self.grammar = grammar
# See note in docstring above. TL;DR this is ignored.
self.convert = convert or lam_sub
self.is_backtracking = False
self.last_token: Optional[int] = None
def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None:
"""Prepare for parsing.
This *must* be called before starting to parse.
The optional argument is an alternative start symbol; it
defaults to the grammar's start symbol.
You can use a Parser instance to parse any number of programs;
each time you call setup() the parser is reset to an initial
state determined by the (implicit or explicit) start symbol.
"""
if start is None:
start = self.grammar.start
# Each stack entry is a tuple: (dfa, state, node).
# A node is a tuple: (type, value, context, children),
# where children is a list of nodes or None, and context may be None.
newnode: RawNode = (start, None, None, [])
stackentry = (self.grammar.dfas[start], 0, newnode)
self.stack: list[tuple[DFAS, int, RawNode]] = [stackentry]
self.rootnode: Optional[NL] = None
self.used_names: set[str] = set()
self.proxy = proxy
self.last_token = None
def addtoken(self, type: int, value: str, context: Context) -> bool:
"""Add a token; return True iff this is the end of the program."""
# Map from token to label
ilabels = self.classify(type, value, context)
assert len(ilabels) >= 1
# If we have only one state to advance, we'll directly
# take it as is.
if len(ilabels) == 1:
[ilabel] = ilabels
return self._addtoken(ilabel, type, value, context)
# If there are multiple states which we can advance (only
# happen under soft-keywords), then we will try all of them
# in parallel and as soon as one state can reach further than
# the rest, we'll choose that one. This is a pretty hacky
# and hopefully temporary algorithm.
#
# For a more detailed explanation, check out this post:
# https://tree.science/what-the-backtracking.html
with self.proxy.release() as proxy:
counter, force = 0, False
recorder = Recorder(self, ilabels, context)
recorder.add_token(type, value, raw=True)
next_token_value = value
while recorder.determine_route(next_token_value) is None:
if not proxy.can_advance(counter):
force = True
break
next_token_type, next_token_value, *_ = proxy.eat(counter)
if next_token_type in (tokenize.COMMENT, tokenize.NL):
counter += 1
continue
if next_token_type == tokenize.OP:
next_token_type = grammar.opmap[next_token_value]
recorder.add_token(next_token_type, next_token_value)
counter += 1
ilabel = cast(int, recorder.determine_route(next_token_value, force=force))
assert ilabel is not None
return self._addtoken(ilabel, type, value, context)
def _addtoken(self, ilabel: int, type: int, value: str, context: Context) -> bool:
# Loop until the token is shifted; may raise exceptions
while True:
dfa, state, node = self.stack[-1]
states, first = dfa
arcs = states[state]
# Look for a state with this label
for i, newstate in arcs:
t = self.grammar.labels[i][0]
if t >= 256:
# See if it's a symbol and if we're in its first set
itsdfa = self.grammar.dfas[t]
itsstates, itsfirst = itsdfa
if ilabel in itsfirst:
# Push a symbol
self.push(t, itsdfa, newstate, context)
break # To continue the outer while loop
elif ilabel == i:
# Look it up in the list of labels
# Shift a token; we're done with it
self.shift(type, value, newstate, context)
# Pop while we are in an accept-only state
state = newstate
while states[state] == [(0, state)]:
self.pop()
if not self.stack:
# Done parsing!
return True
dfa, state, node = self.stack[-1]
states, first = dfa
# Done with this token
self.last_token = type
return False
else:
if (0, state) in arcs:
# An accepting state, pop it and try something else
self.pop()
if not self.stack:
# Done parsing, but another token is input
raise ParseError("too much input", type, value, context)
else:
# No success finding a transition
raise ParseError("bad input", type, value, context)
def classify(self, type: int, value: str, context: Context) -> list[int]:
"""Turn a token into a label. (Internal)
Depending on whether the value is a soft-keyword or not,
this function may return multiple labels to choose from."""
if type == token.NAME:
# Keep a listing of all used names
self.used_names.add(value)
# Check for reserved words
if value in self.grammar.keywords:
return [self.grammar.keywords[value]]
elif value in self.grammar.soft_keywords:
assert type in self.grammar.tokens
# Current soft keywords (match, case, type) can only appear at the
# beginning of a statement. So as a shortcut, don't try to treat them
# like keywords in any other context.
# ('_' is also a soft keyword in the real grammar, but for our grammar
# it's just an expression, so we don't need to treat it specially.)
if self.last_token not in (
None,
token.INDENT,
token.DEDENT,
token.NEWLINE,
token.SEMI,
token.COLON,
):
return [self.grammar.tokens[type]]
return [
self.grammar.tokens[type],
self.grammar.soft_keywords[value],
]
ilabel = self.grammar.tokens.get(type)
if ilabel is None:
raise ParseError("bad token", type, value, context)
return [ilabel]
def shift(self, type: int, value: str, newstate: int, context: Context) -> None:
"""Shift a token. (Internal)"""
if self.is_backtracking:
dfa, state, _ = self.stack[-1]
self.stack[-1] = (dfa, newstate, DUMMY_NODE)
else:
dfa, state, node = self.stack[-1]
rawnode: RawNode = (type, value, context, None)
newnode = convert(self.grammar, rawnode)
assert node[-1] is not None
node[-1].append(newnode)
self.stack[-1] = (dfa, newstate, node)
def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None:
"""Push a nonterminal. (Internal)"""
if self.is_backtracking:
dfa, state, _ = self.stack[-1]
self.stack[-1] = (dfa, newstate, DUMMY_NODE)
self.stack.append((newdfa, 0, DUMMY_NODE))
else:
dfa, state, node = self.stack[-1]
newnode: RawNode = (type, None, context, [])
self.stack[-1] = (dfa, newstate, node)
self.stack.append((newdfa, 0, newnode))
def pop(self) -> None:
"""Pop a nonterminal. (Internal)"""
if self.is_backtracking:
self.stack.pop()
else:
popdfa, popstate, popnode = self.stack.pop()
newnode = convert(self.grammar, popnode)
if self.stack:
dfa, state, node = self.stack[-1]
assert node[-1] is not None
node[-1].append(newnode)
else:
self.rootnode = newnode
self.rootnode.used_names = self.used_names

View File

@@ -0,0 +1,417 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
import os
from typing import IO, Any, Iterator, NoReturn, Optional, Sequence, Union
from blib2to3.pgen2 import grammar, token, tokenize
from blib2to3.pgen2.tokenize import GoodTokenInfo
Path = Union[str, "os.PathLike[str]"]
class PgenGrammar(grammar.Grammar):
pass
class ParserGenerator:
filename: Path
stream: IO[str]
generator: Iterator[GoodTokenInfo]
first: dict[str, Optional[dict[str, int]]]
def __init__(self, filename: Path, stream: Optional[IO[str]] = None) -> None:
close_stream = None
if stream is None:
stream = open(filename, encoding="utf-8")
close_stream = stream.close
self.filename = filename
self.stream = stream
self.generator = tokenize.generate_tokens(stream.readline)
self.gettoken() # Initialize lookahead
self.dfas, self.startsymbol = self.parse()
if close_stream is not None:
close_stream()
self.first = {} # map from symbol name to set of tokens
self.addfirstsets()
def make_grammar(self) -> PgenGrammar:
c = PgenGrammar()
names = list(self.dfas.keys())
names.sort()
names.remove(self.startsymbol)
names.insert(0, self.startsymbol)
for name in names:
i = 256 + len(c.symbol2number)
c.symbol2number[name] = i
c.number2symbol[i] = name
for name in names:
dfa = self.dfas[name]
states = []
for state in dfa:
arcs = []
for label, next in sorted(state.arcs.items()):
arcs.append((self.make_label(c, label), dfa.index(next)))
if state.isfinal:
arcs.append((0, dfa.index(state)))
states.append(arcs)
c.states.append(states)
c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
c.start = c.symbol2number[self.startsymbol]
return c
def make_first(self, c: PgenGrammar, name: str) -> dict[int, int]:
rawfirst = self.first[name]
assert rawfirst is not None
first = {}
for label in sorted(rawfirst):
ilabel = self.make_label(c, label)
##assert ilabel not in first # XXX failed on <> ... !=
first[ilabel] = 1
return first
def make_label(self, c: PgenGrammar, label: str) -> int:
# XXX Maybe this should be a method on a subclass of converter?
ilabel = len(c.labels)
if label[0].isalpha():
# Either a symbol name or a named token
if label in c.symbol2number:
# A symbol name (a non-terminal)
if label in c.symbol2label:
return c.symbol2label[label]
else:
c.labels.append((c.symbol2number[label], None))
c.symbol2label[label] = ilabel
return ilabel
else:
# A named token (NAME, NUMBER, STRING)
itoken = getattr(token, label, None)
assert isinstance(itoken, int), label
assert itoken in token.tok_name, label
if itoken in c.tokens:
return c.tokens[itoken]
else:
c.labels.append((itoken, None))
c.tokens[itoken] = ilabel
return ilabel
else:
# Either a keyword or an operator
assert label[0] in ('"', "'"), label
value = eval(label)
if value[0].isalpha():
if label[0] == '"':
keywords = c.soft_keywords
else:
keywords = c.keywords
# A keyword
if value in keywords:
return keywords[value]
else:
c.labels.append((token.NAME, value))
keywords[value] = ilabel
return ilabel
else:
# An operator (any non-numeric token)
itoken = grammar.opmap[value] # Fails if unknown token
if itoken in c.tokens:
return c.tokens[itoken]
else:
c.labels.append((itoken, None))
c.tokens[itoken] = ilabel
return ilabel
def addfirstsets(self) -> None:
names = list(self.dfas.keys())
names.sort()
for name in names:
if name not in self.first:
self.calcfirst(name)
# print name, self.first[name].keys()
def calcfirst(self, name: str) -> None:
dfa = self.dfas[name]
self.first[name] = None # dummy to detect left recursion
state = dfa[0]
totalset: dict[str, int] = {}
overlapcheck = {}
for label in state.arcs:
if label in self.dfas:
if label in self.first:
fset = self.first[label]
if fset is None:
raise ValueError("recursion for rule %r" % name)
else:
self.calcfirst(label)
fset = self.first[label]
assert fset is not None
totalset.update(fset)
overlapcheck[label] = fset
else:
totalset[label] = 1
overlapcheck[label] = {label: 1}
inverse: dict[str, str] = {}
for label, itsfirst in overlapcheck.items():
for symbol in itsfirst:
if symbol in inverse:
raise ValueError(
"rule %s is ambiguous; %s is in the first sets of %s as well"
" as %s" % (name, symbol, label, inverse[symbol])
)
inverse[symbol] = label
self.first[name] = totalset
def parse(self) -> tuple[dict[str, list["DFAState"]], str]:
dfas = {}
startsymbol: Optional[str] = None
# MSTART: (NEWLINE | RULE)* ENDMARKER
while self.type != token.ENDMARKER:
while self.type == token.NEWLINE:
self.gettoken()
# RULE: NAME ':' RHS NEWLINE
name = self.expect(token.NAME)
self.expect(token.OP, ":")
a, z = self.parse_rhs()
self.expect(token.NEWLINE)
# self.dump_nfa(name, a, z)
dfa = self.make_dfa(a, z)
# self.dump_dfa(name, dfa)
# oldlen = len(dfa)
self.simplify_dfa(dfa)
# newlen = len(dfa)
dfas[name] = dfa
# print name, oldlen, newlen
if startsymbol is None:
startsymbol = name
assert startsymbol is not None
return dfas, startsymbol
def make_dfa(self, start: "NFAState", finish: "NFAState") -> list["DFAState"]:
# To turn an NFA into a DFA, we define the states of the DFA
# to correspond to *sets* of states of the NFA. Then do some
# state reduction. Let's represent sets as dicts with 1 for
# values.
assert isinstance(start, NFAState)
assert isinstance(finish, NFAState)
def closure(state: NFAState) -> dict[NFAState, int]:
base: dict[NFAState, int] = {}
addclosure(state, base)
return base
def addclosure(state: NFAState, base: dict[NFAState, int]) -> None:
assert isinstance(state, NFAState)
if state in base:
return
base[state] = 1
for label, next in state.arcs:
if label is None:
addclosure(next, base)
states = [DFAState(closure(start), finish)]
for state in states: # NB states grows while we're iterating
arcs: dict[str, dict[NFAState, int]] = {}
for nfastate in state.nfaset:
for label, next in nfastate.arcs:
if label is not None:
addclosure(next, arcs.setdefault(label, {}))
for label, nfaset in sorted(arcs.items()):
for st in states:
if st.nfaset == nfaset:
break
else:
st = DFAState(nfaset, finish)
states.append(st)
state.addarc(st, label)
return states # List of DFAState instances; first one is start
def dump_nfa(self, name: str, start: "NFAState", finish: "NFAState") -> None:
print("Dump of NFA for", name)
todo = [start]
for i, state in enumerate(todo):
print(" State", i, state is finish and "(final)" or "")
for label, next in state.arcs:
if next in todo:
j = todo.index(next)
else:
j = len(todo)
todo.append(next)
if label is None:
print(" -> %d" % j)
else:
print(" %s -> %d" % (label, j))
def dump_dfa(self, name: str, dfa: Sequence["DFAState"]) -> None:
print("Dump of DFA for", name)
for i, state in enumerate(dfa):
print(" State", i, state.isfinal and "(final)" or "")
for label, next in sorted(state.arcs.items()):
print(" %s -> %d" % (label, dfa.index(next)))
def simplify_dfa(self, dfa: list["DFAState"]) -> None:
# This is not theoretically optimal, but works well enough.
# Algorithm: repeatedly look for two states that have the same
# set of arcs (same labels pointing to the same nodes) and
# unify them, until things stop changing.
# dfa is a list of DFAState instances
changes = True
while changes:
changes = False
for i, state_i in enumerate(dfa):
for j in range(i + 1, len(dfa)):
state_j = dfa[j]
if state_i == state_j:
# print " unify", i, j
del dfa[j]
for state in dfa:
state.unifystate(state_j, state_i)
changes = True
break
def parse_rhs(self) -> tuple["NFAState", "NFAState"]:
# RHS: ALT ('|' ALT)*
a, z = self.parse_alt()
if self.value != "|":
return a, z
else:
aa = NFAState()
zz = NFAState()
aa.addarc(a)
z.addarc(zz)
while self.value == "|":
self.gettoken()
a, z = self.parse_alt()
aa.addarc(a)
z.addarc(zz)
return aa, zz
def parse_alt(self) -> tuple["NFAState", "NFAState"]:
# ALT: ITEM+
a, b = self.parse_item()
while self.value in ("(", "[") or self.type in (token.NAME, token.STRING):
c, d = self.parse_item()
b.addarc(c)
b = d
return a, b
def parse_item(self) -> tuple["NFAState", "NFAState"]:
# ITEM: '[' RHS ']' | ATOM ['+' | '*']
if self.value == "[":
self.gettoken()
a, z = self.parse_rhs()
self.expect(token.OP, "]")
a.addarc(z)
return a, z
else:
a, z = self.parse_atom()
value = self.value
if value not in ("+", "*"):
return a, z
self.gettoken()
z.addarc(a)
if value == "+":
return a, z
else:
return a, a
def parse_atom(self) -> tuple["NFAState", "NFAState"]:
# ATOM: '(' RHS ')' | NAME | STRING
if self.value == "(":
self.gettoken()
a, z = self.parse_rhs()
self.expect(token.OP, ")")
return a, z
elif self.type in (token.NAME, token.STRING):
a = NFAState()
z = NFAState()
a.addarc(z, self.value)
self.gettoken()
return a, z
else:
self.raise_error(
"expected (...) or NAME or STRING, got %s/%s", self.type, self.value
)
raise AssertionError
def expect(self, type: int, value: Optional[Any] = None) -> str:
if self.type != type or (value is not None and self.value != value):
self.raise_error(
"expected %s/%s, got %s/%s", type, value, self.type, self.value
)
value = self.value
self.gettoken()
return value
def gettoken(self) -> None:
tup = next(self.generator)
while tup[0] in (tokenize.COMMENT, tokenize.NL):
tup = next(self.generator)
self.type, self.value, self.begin, self.end, self.line = tup
# print token.tok_name[self.type], repr(self.value)
def raise_error(self, msg: str, *args: Any) -> NoReturn:
if args:
try:
msg = msg % args
except Exception:
msg = " ".join([msg] + list(map(str, args)))
raise SyntaxError(msg, (self.filename, self.end[0], self.end[1], self.line))
class NFAState:
arcs: list[tuple[Optional[str], "NFAState"]]
def __init__(self) -> None:
self.arcs = [] # list of (label, NFAState) pairs
def addarc(self, next: "NFAState", label: Optional[str] = None) -> None:
assert label is None or isinstance(label, str)
assert isinstance(next, NFAState)
self.arcs.append((label, next))
class DFAState:
nfaset: dict[NFAState, Any]
isfinal: bool
arcs: dict[str, "DFAState"]
def __init__(self, nfaset: dict[NFAState, Any], final: NFAState) -> None:
assert isinstance(nfaset, dict)
assert isinstance(next(iter(nfaset)), NFAState)
assert isinstance(final, NFAState)
self.nfaset = nfaset
self.isfinal = final in nfaset
self.arcs = {} # map from label to DFAState
def addarc(self, next: "DFAState", label: str) -> None:
assert isinstance(label, str)
assert label not in self.arcs
assert isinstance(next, DFAState)
self.arcs[label] = next
def unifystate(self, old: "DFAState", new: "DFAState") -> None:
for label, next in self.arcs.items():
if next is old:
self.arcs[label] = new
def __eq__(self, other: Any) -> bool:
# Equality test -- ignore the nfaset instance variable
assert isinstance(other, DFAState)
if self.isfinal != other.isfinal:
return False
# Can't just return self.arcs == other.arcs, because that
# would invoke this method recursively, with cycles...
if len(self.arcs) != len(other.arcs):
return False
for label, next in self.arcs.items():
if next is not other.arcs.get(label):
return False
return True
__hash__: Any = None # For Py3 compatibility.
def generate_grammar(filename: Path = "Grammar.txt") -> PgenGrammar:
p = ParserGenerator(filename)
return p.make_grammar()

View File

@@ -0,0 +1,92 @@
"""Token constants (from "token.h")."""
from typing import Final
# Taken from Python (r53757) and modified to include some tokens
# originally monkeypatched in by pgen2.tokenize
# --start constants--
ENDMARKER: Final = 0
NAME: Final = 1
NUMBER: Final = 2
STRING: Final = 3
NEWLINE: Final = 4
INDENT: Final = 5
DEDENT: Final = 6
LPAR: Final = 7
RPAR: Final = 8
LSQB: Final = 9
RSQB: Final = 10
COLON: Final = 11
COMMA: Final = 12
SEMI: Final = 13
PLUS: Final = 14
MINUS: Final = 15
STAR: Final = 16
SLASH: Final = 17
VBAR: Final = 18
AMPER: Final = 19
LESS: Final = 20
GREATER: Final = 21
EQUAL: Final = 22
DOT: Final = 23
PERCENT: Final = 24
BACKQUOTE: Final = 25
LBRACE: Final = 26
RBRACE: Final = 27
EQEQUAL: Final = 28
NOTEQUAL: Final = 29
LESSEQUAL: Final = 30
GREATEREQUAL: Final = 31
TILDE: Final = 32
CIRCUMFLEX: Final = 33
LEFTSHIFT: Final = 34
RIGHTSHIFT: Final = 35
DOUBLESTAR: Final = 36
PLUSEQUAL: Final = 37
MINEQUAL: Final = 38
STAREQUAL: Final = 39
SLASHEQUAL: Final = 40
PERCENTEQUAL: Final = 41
AMPEREQUAL: Final = 42
VBAREQUAL: Final = 43
CIRCUMFLEXEQUAL: Final = 44
LEFTSHIFTEQUAL: Final = 45
RIGHTSHIFTEQUAL: Final = 46
DOUBLESTAREQUAL: Final = 47
DOUBLESLASH: Final = 48
DOUBLESLASHEQUAL: Final = 49
AT: Final = 50
ATEQUAL: Final = 51
OP: Final = 52
COMMENT: Final = 53
NL: Final = 54
RARROW: Final = 55
AWAIT: Final = 56
ASYNC: Final = 57
ERRORTOKEN: Final = 58
COLONEQUAL: Final = 59
FSTRING_START: Final = 60
FSTRING_MIDDLE: Final = 61
FSTRING_END: Final = 62
BANG: Final = 63
N_TOKENS: Final = 64
NT_OFFSET: Final = 256
# --end constants--
tok_name: Final[dict[int, str]] = {}
for _name, _value in list(globals().items()):
if type(_value) is int:
tok_name[_value] = _name
def ISTERMINAL(x: int) -> bool:
return x < NT_OFFSET
def ISNONTERMINAL(x: int) -> bool:
return x >= NT_OFFSET
def ISEOF(x: int) -> bool:
return x == ENDMARKER

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,204 @@
# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Export the Python grammar and symbols."""
# Python imports
import os
from typing import Union
# Local imports
from .pgen2 import driver
from .pgen2.grammar import Grammar
# Moved into initialize because mypyc can't handle __file__ (XXX bug)
# # The grammar file
# _GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt")
# _PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
# "PatternGrammar.txt")
class Symbols:
def __init__(self, grammar: Grammar) -> None:
"""Initializer.
Creates an attribute for each grammar symbol (nonterminal),
whose value is the symbol's type (an int >= 256).
"""
for name, symbol in grammar.symbol2number.items():
setattr(self, name, symbol)
class _python_symbols(Symbols):
and_expr: int
and_test: int
annassign: int
arglist: int
argument: int
arith_expr: int
asexpr_test: int
assert_stmt: int
async_funcdef: int
async_stmt: int
atom: int
augassign: int
break_stmt: int
case_block: int
classdef: int
comp_for: int
comp_if: int
comp_iter: int
comp_op: int
comparison: int
compound_stmt: int
continue_stmt: int
decorated: int
decorator: int
decorators: int
del_stmt: int
dictsetmaker: int
dotted_as_name: int
dotted_as_names: int
dotted_name: int
encoding_decl: int
eval_input: int
except_clause: int
expr: int
expr_stmt: int
exprlist: int
factor: int
file_input: int
flow_stmt: int
for_stmt: int
fstring: int
fstring_format_spec: int
fstring_middle: int
fstring_replacement_field: int
funcdef: int
global_stmt: int
guard: int
if_stmt: int
import_as_name: int
import_as_names: int
import_from: int
import_name: int
import_stmt: int
lambdef: int
listmaker: int
match_stmt: int
namedexpr_test: int
not_test: int
old_comp_for: int
old_comp_if: int
old_comp_iter: int
old_lambdef: int
old_test: int
or_test: int
parameters: int
paramspec: int
pass_stmt: int
pattern: int
patterns: int
power: int
raise_stmt: int
return_stmt: int
shift_expr: int
simple_stmt: int
single_input: int
sliceop: int
small_stmt: int
subject_expr: int
star_expr: int
stmt: int
subscript: int
subscriptlist: int
suite: int
term: int
test: int
testlist: int
testlist1: int
testlist_gexp: int
testlist_safe: int
testlist_star_expr: int
tfpdef: int
tfplist: int
tname: int
tname_star: int
trailer: int
try_stmt: int
type_stmt: int
typedargslist: int
typeparam: int
typeparams: int
typevar: int
typevartuple: int
varargslist: int
vfpdef: int
vfplist: int
vname: int
while_stmt: int
with_stmt: int
xor_expr: int
yield_arg: int
yield_expr: int
yield_stmt: int
class _pattern_symbols(Symbols):
Alternative: int
Alternatives: int
Details: int
Matcher: int
NegatedUnit: int
Repeater: int
Unit: int
python_grammar: Grammar
python_grammar_async_keywords: Grammar
python_grammar_soft_keywords: Grammar
pattern_grammar: Grammar
python_symbols: _python_symbols
pattern_symbols: _pattern_symbols
def initialize(cache_dir: Union[str, "os.PathLike[str]", None] = None) -> None:
global python_grammar
global python_grammar_async_keywords
global python_grammar_soft_keywords
global python_symbols
global pattern_grammar
global pattern_symbols
# The grammar file
_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt")
_PATTERN_GRAMMAR_FILE = os.path.join(
os.path.dirname(__file__), "PatternGrammar.txt"
)
python_grammar = driver.load_packaged_grammar("blib2to3", _GRAMMAR_FILE, cache_dir)
assert "print" not in python_grammar.keywords
assert "exec" not in python_grammar.keywords
soft_keywords = python_grammar.soft_keywords.copy()
python_grammar.soft_keywords.clear()
python_symbols = _python_symbols(python_grammar)
# Python 3.0-3.6
python_grammar.version = (3, 0)
# Python 3.7+
python_grammar_async_keywords = python_grammar.copy()
python_grammar_async_keywords.async_keywords = True
python_grammar_async_keywords.version = (3, 7)
# Python 3.10+
python_grammar_soft_keywords = python_grammar_async_keywords.copy()
python_grammar_soft_keywords.soft_keywords = soft_keywords
python_grammar_soft_keywords.version = (3, 10)
pattern_grammar = driver.load_packaged_grammar(
"blib2to3", _PATTERN_GRAMMAR_FILE, cache_dir
)
pattern_symbols = _pattern_symbols(pattern_grammar)

View File

@@ -0,0 +1,975 @@
# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""
Python parse tree definitions.
This is a very concrete parse tree; we need to keep every token and
even the comments and whitespace between tokens.
There's also a pattern matching implementation here.
"""
# mypy: allow-untyped-defs, allow-incomplete-defs
from typing import Any, Iterable, Iterator, Optional, TypeVar, Union
from blib2to3.pgen2.grammar import Grammar
__author__ = "Guido van Rossum <guido@python.org>"
import sys
from io import StringIO
HUGE: int = 0x7FFFFFFF # maximum repeat count, default max
_type_reprs: dict[int, Union[str, int]] = {}
def type_repr(type_num: int) -> Union[str, int]:
global _type_reprs
if not _type_reprs:
from . import pygram
if not hasattr(pygram, "python_symbols"):
pygram.initialize(cache_dir=None)
# printing tokens is possible but not as useful
# from .pgen2 import token // token.__dict__.items():
for name in dir(pygram.python_symbols):
val = getattr(pygram.python_symbols, name)
if type(val) == int:
_type_reprs[val] = name
return _type_reprs.setdefault(type_num, type_num)
_P = TypeVar("_P", bound="Base")
NL = Union["Node", "Leaf"]
Context = tuple[str, tuple[int, int]]
RawNode = tuple[int, Optional[str], Optional[Context], Optional[list[NL]]]
class Base:
"""
Abstract base class for Node and Leaf.
This provides some default functionality and boilerplate using the
template pattern.
A node may be a subnode of at most one parent.
"""
# Default values for instance variables
type: int # int: token number (< 256) or symbol number (>= 256)
parent: Optional["Node"] = None # Parent node pointer, or None
children: list[NL] # List of subnodes
was_changed: bool = False
was_checked: bool = False
def __new__(cls, *args, **kwds):
"""Constructor that prevents Base from being instantiated."""
assert cls is not Base, "Cannot instantiate Base"
return object.__new__(cls)
def __eq__(self, other: Any) -> bool:
"""
Compare two nodes for equality.
This calls the method _eq().
"""
if self.__class__ is not other.__class__:
return NotImplemented
return self._eq(other)
@property
def prefix(self) -> str:
raise NotImplementedError
def _eq(self: _P, other: _P) -> bool:
"""
Compare two nodes for equality.
This is called by __eq__ and __ne__. It is only called if the two nodes
have the same type. This must be implemented by the concrete subclass.
Nodes should be considered equal if they have the same structure,
ignoring the prefix string and other context information.
"""
raise NotImplementedError
def __deepcopy__(self: _P, memo: Any) -> _P:
return self.clone()
def clone(self: _P) -> _P:
"""
Return a cloned (deep) copy of self.
This must be implemented by the concrete subclass.
"""
raise NotImplementedError
def post_order(self) -> Iterator[NL]:
"""
Return a post-order iterator for the tree.
This must be implemented by the concrete subclass.
"""
raise NotImplementedError
def pre_order(self) -> Iterator[NL]:
"""
Return a pre-order iterator for the tree.
This must be implemented by the concrete subclass.
"""
raise NotImplementedError
def replace(self, new: Union[NL, list[NL]]) -> None:
"""Replace this node with a new one in the parent."""
assert self.parent is not None, str(self)
assert new is not None
if not isinstance(new, list):
new = [new]
l_children = []
found = False
for ch in self.parent.children:
if ch is self:
assert not found, (self.parent.children, self, new)
if new is not None:
l_children.extend(new)
found = True
else:
l_children.append(ch)
assert found, (self.children, self, new)
self.parent.children = l_children
self.parent.changed()
self.parent.invalidate_sibling_maps()
for x in new:
x.parent = self.parent
self.parent = None
def get_lineno(self) -> Optional[int]:
"""Return the line number which generated the invocant node."""
node = self
while not isinstance(node, Leaf):
if not node.children:
return None
node = node.children[0]
return node.lineno
def changed(self) -> None:
if self.was_changed:
return
if self.parent:
self.parent.changed()
self.was_changed = True
def remove(self) -> Optional[int]:
"""
Remove the node from the tree. Returns the position of the node in its
parent's children before it was removed.
"""
if self.parent:
for i, node in enumerate(self.parent.children):
if node is self:
del self.parent.children[i]
self.parent.changed()
self.parent.invalidate_sibling_maps()
self.parent = None
return i
return None
@property
def next_sibling(self) -> Optional[NL]:
"""
The node immediately following the invocant in their parent's children
list. If the invocant does not have a next sibling, it is None
"""
if self.parent is None:
return None
if self.parent.next_sibling_map is None:
self.parent.update_sibling_maps()
assert self.parent.next_sibling_map is not None
return self.parent.next_sibling_map[id(self)]
@property
def prev_sibling(self) -> Optional[NL]:
"""
The node immediately preceding the invocant in their parent's children
list. If the invocant does not have a previous sibling, it is None.
"""
if self.parent is None:
return None
if self.parent.prev_sibling_map is None:
self.parent.update_sibling_maps()
assert self.parent.prev_sibling_map is not None
return self.parent.prev_sibling_map[id(self)]
def leaves(self) -> Iterator["Leaf"]:
for child in self.children:
yield from child.leaves()
def depth(self) -> int:
if self.parent is None:
return 0
return 1 + self.parent.depth()
def get_suffix(self) -> str:
"""
Return the string immediately following the invocant node. This is
effectively equivalent to node.next_sibling.prefix
"""
next_sib = self.next_sibling
if next_sib is None:
return ""
prefix = next_sib.prefix
return prefix
class Node(Base):
"""Concrete implementation for interior nodes."""
fixers_applied: Optional[list[Any]]
used_names: Optional[set[str]]
def __init__(
self,
type: int,
children: list[NL],
context: Optional[Any] = None,
prefix: Optional[str] = None,
fixers_applied: Optional[list[Any]] = None,
) -> None:
"""
Initializer.
Takes a type constant (a symbol number >= 256), a sequence of
child nodes, and an optional context keyword argument.
As a side effect, the parent pointers of the children are updated.
"""
assert type >= 256, type
self.type = type
self.children = list(children)
for ch in self.children:
assert ch.parent is None, repr(ch)
ch.parent = self
self.invalidate_sibling_maps()
if prefix is not None:
self.prefix = prefix
if fixers_applied:
self.fixers_applied = fixers_applied[:]
else:
self.fixers_applied = None
def __repr__(self) -> str:
"""Return a canonical string representation."""
assert self.type is not None
return "{}({}, {!r})".format(
self.__class__.__name__,
type_repr(self.type),
self.children,
)
def __str__(self) -> str:
"""
Return a pretty string representation.
This reproduces the input source exactly.
"""
return "".join(map(str, self.children))
def _eq(self, other: Base) -> bool:
"""Compare two nodes for equality."""
return (self.type, self.children) == (other.type, other.children)
def clone(self) -> "Node":
assert self.type is not None
"""Return a cloned (deep) copy of self."""
return Node(
self.type,
[ch.clone() for ch in self.children],
fixers_applied=self.fixers_applied,
)
def post_order(self) -> Iterator[NL]:
"""Return a post-order iterator for the tree."""
for child in self.children:
yield from child.post_order()
yield self
def pre_order(self) -> Iterator[NL]:
"""Return a pre-order iterator for the tree."""
yield self
for child in self.children:
yield from child.pre_order()
@property
def prefix(self) -> str:
"""
The whitespace and comments preceding this node in the input.
"""
if not self.children:
return ""
return self.children[0].prefix
@prefix.setter
def prefix(self, prefix: str) -> None:
if self.children:
self.children[0].prefix = prefix
def set_child(self, i: int, child: NL) -> None:
"""
Equivalent to 'node.children[i] = child'. This method also sets the
child's parent attribute appropriately.
"""
child.parent = self
self.children[i].parent = None
self.children[i] = child
self.changed()
self.invalidate_sibling_maps()
def insert_child(self, i: int, child: NL) -> None:
"""
Equivalent to 'node.children.insert(i, child)'. This method also sets
the child's parent attribute appropriately.
"""
child.parent = self
self.children.insert(i, child)
self.changed()
self.invalidate_sibling_maps()
def append_child(self, child: NL) -> None:
"""
Equivalent to 'node.children.append(child)'. This method also sets the
child's parent attribute appropriately.
"""
child.parent = self
self.children.append(child)
self.changed()
self.invalidate_sibling_maps()
def invalidate_sibling_maps(self) -> None:
self.prev_sibling_map: Optional[dict[int, Optional[NL]]] = None
self.next_sibling_map: Optional[dict[int, Optional[NL]]] = None
def update_sibling_maps(self) -> None:
_prev: dict[int, Optional[NL]] = {}
_next: dict[int, Optional[NL]] = {}
self.prev_sibling_map = _prev
self.next_sibling_map = _next
previous: Optional[NL] = None
for current in self.children:
_prev[id(current)] = previous
_next[id(previous)] = current
previous = current
_next[id(current)] = None
class Leaf(Base):
"""Concrete implementation for leaf nodes."""
# Default values for instance variables
value: str
fixers_applied: list[Any]
bracket_depth: int
# Changed later in brackets.py
opening_bracket: Optional["Leaf"] = None
used_names: Optional[set[str]]
_prefix = "" # Whitespace and comments preceding this token in the input
lineno: int = 0 # Line where this token starts in the input
column: int = 0 # Column where this token starts in the input
# If not None, this Leaf is created by converting a block of fmt off/skip
# code, and `fmt_pass_converted_first_leaf` points to the first Leaf in the
# converted code.
fmt_pass_converted_first_leaf: Optional["Leaf"] = None
def __init__(
self,
type: int,
value: str,
context: Optional[Context] = None,
prefix: Optional[str] = None,
fixers_applied: list[Any] = [],
opening_bracket: Optional["Leaf"] = None,
fmt_pass_converted_first_leaf: Optional["Leaf"] = None,
) -> None:
"""
Initializer.
Takes a type constant (a token number < 256), a string value, and an
optional context keyword argument.
"""
assert 0 <= type < 256, type
if context is not None:
self._prefix, (self.lineno, self.column) = context
self.type = type
self.value = value
if prefix is not None:
self._prefix = prefix
self.fixers_applied: Optional[list[Any]] = fixers_applied[:]
self.children = []
self.opening_bracket = opening_bracket
self.fmt_pass_converted_first_leaf = fmt_pass_converted_first_leaf
def __repr__(self) -> str:
"""Return a canonical string representation."""
from .pgen2.token import tok_name
assert self.type is not None
return "{}({}, {!r})".format(
self.__class__.__name__,
tok_name.get(self.type, self.type),
self.value,
)
def __str__(self) -> str:
"""
Return a pretty string representation.
This reproduces the input source exactly.
"""
return self._prefix + str(self.value)
def _eq(self, other: "Leaf") -> bool:
"""Compare two nodes for equality."""
return (self.type, self.value) == (other.type, other.value)
def clone(self) -> "Leaf":
assert self.type is not None
"""Return a cloned (deep) copy of self."""
return Leaf(
self.type,
self.value,
(self.prefix, (self.lineno, self.column)),
fixers_applied=self.fixers_applied,
)
def leaves(self) -> Iterator["Leaf"]:
yield self
def post_order(self) -> Iterator["Leaf"]:
"""Return a post-order iterator for the tree."""
yield self
def pre_order(self) -> Iterator["Leaf"]:
"""Return a pre-order iterator for the tree."""
yield self
@property
def prefix(self) -> str:
"""
The whitespace and comments preceding this token in the input.
"""
return self._prefix
@prefix.setter
def prefix(self, prefix: str) -> None:
self.changed()
self._prefix = prefix
def convert(gr: Grammar, raw_node: RawNode) -> NL:
"""
Convert raw node information to a Node or Leaf instance.
This is passed to the parser driver which calls it whenever a reduction of a
grammar rule produces a new complete node, so that the tree is build
strictly bottom-up.
"""
type, value, context, children = raw_node
if children or type in gr.number2symbol:
# If there's exactly one child, return that child instead of
# creating a new node.
assert children is not None
if len(children) == 1:
return children[0]
return Node(type, children, context=context)
else:
return Leaf(type, value or "", context=context)
_Results = dict[str, NL]
class BasePattern:
"""
A pattern is a tree matching pattern.
It looks for a specific node type (token or symbol), and
optionally for a specific content.
This is an abstract base class. There are three concrete
subclasses:
- LeafPattern matches a single leaf node;
- NodePattern matches a single node (usually non-leaf);
- WildcardPattern matches a sequence of nodes of variable length.
"""
# Defaults for instance variables
type: Optional[int]
type = None # Node type (token if < 256, symbol if >= 256)
content: Any = None # Optional content matching pattern
name: Optional[str] = None # Optional name used to store match in results dict
def __new__(cls, *args, **kwds):
"""Constructor that prevents BasePattern from being instantiated."""
assert cls is not BasePattern, "Cannot instantiate BasePattern"
return object.__new__(cls)
def __repr__(self) -> str:
assert self.type is not None
args = [type_repr(self.type), self.content, self.name]
while args and args[-1] is None:
del args[-1]
return "{}({})".format(self.__class__.__name__, ", ".join(map(repr, args)))
def _submatch(self, node, results=None) -> bool:
raise NotImplementedError
def optimize(self) -> "BasePattern":
"""
A subclass can define this as a hook for optimizations.
Returns either self or another node with the same effect.
"""
return self
def match(self, node: NL, results: Optional[_Results] = None) -> bool:
"""
Does this pattern exactly match a node?
Returns True if it matches, False if not.
If results is not None, it must be a dict which will be
updated with the nodes matching named subpatterns.
Default implementation for non-wildcard patterns.
"""
if self.type is not None and node.type != self.type:
return False
if self.content is not None:
r: Optional[_Results] = None
if results is not None:
r = {}
if not self._submatch(node, r):
return False
if r:
assert results is not None
results.update(r)
if results is not None and self.name:
results[self.name] = node
return True
def match_seq(self, nodes: list[NL], results: Optional[_Results] = None) -> bool:
"""
Does this pattern exactly match a sequence of nodes?
Default implementation for non-wildcard patterns.
"""
if len(nodes) != 1:
return False
return self.match(nodes[0], results)
def generate_matches(self, nodes: list[NL]) -> Iterator[tuple[int, _Results]]:
"""
Generator yielding all matches for this pattern.
Default implementation for non-wildcard patterns.
"""
r: _Results = {}
if nodes and self.match(nodes[0], r):
yield 1, r
class LeafPattern(BasePattern):
def __init__(
self,
type: Optional[int] = None,
content: Optional[str] = None,
name: Optional[str] = None,
) -> None:
"""
Initializer. Takes optional type, content, and name.
The type, if given must be a token type (< 256). If not given,
this matches any *leaf* node; the content may still be required.
The content, if given, must be a string.
If a name is given, the matching node is stored in the results
dict under that key.
"""
if type is not None:
assert 0 <= type < 256, type
if content is not None:
assert isinstance(content, str), repr(content)
self.type = type
self.content = content
self.name = name
def match(self, node: NL, results=None) -> bool:
"""Override match() to insist on a leaf node."""
if not isinstance(node, Leaf):
return False
return BasePattern.match(self, node, results)
def _submatch(self, node, results=None):
"""
Match the pattern's content to the node's children.
This assumes the node type matches and self.content is not None.
Returns True if it matches, False if not.
If results is not None, it must be a dict which will be
updated with the nodes matching named subpatterns.
When returning False, the results dict may still be updated.
"""
return self.content == node.value
class NodePattern(BasePattern):
wildcards: bool = False
def __init__(
self,
type: Optional[int] = None,
content: Optional[Iterable[str]] = None,
name: Optional[str] = None,
) -> None:
"""
Initializer. Takes optional type, content, and name.
The type, if given, must be a symbol type (>= 256). If the
type is None this matches *any* single node (leaf or not),
except if content is not None, in which it only matches
non-leaf nodes that also match the content pattern.
The content, if not None, must be a sequence of Patterns that
must match the node's children exactly. If the content is
given, the type must not be None.
If a name is given, the matching node is stored in the results
dict under that key.
"""
if type is not None:
assert type >= 256, type
if content is not None:
assert not isinstance(content, str), repr(content)
newcontent = list(content)
for i, item in enumerate(newcontent):
assert isinstance(item, BasePattern), (i, item)
# I don't even think this code is used anywhere, but it does cause
# unreachable errors from mypy. This function's signature does look
# odd though *shrug*.
if isinstance(item, WildcardPattern): # type: ignore[unreachable]
self.wildcards = True # type: ignore[unreachable]
self.type = type
self.content = newcontent # TODO: this is unbound when content is None
self.name = name
def _submatch(self, node, results=None) -> bool:
"""
Match the pattern's content to the node's children.
This assumes the node type matches and self.content is not None.
Returns True if it matches, False if not.
If results is not None, it must be a dict which will be
updated with the nodes matching named subpatterns.
When returning False, the results dict may still be updated.
"""
if self.wildcards:
for c, r in generate_matches(self.content, node.children):
if c == len(node.children):
if results is not None:
results.update(r)
return True
return False
if len(self.content) != len(node.children):
return False
for subpattern, child in zip(self.content, node.children):
if not subpattern.match(child, results):
return False
return True
class WildcardPattern(BasePattern):
"""
A wildcard pattern can match zero or more nodes.
This has all the flexibility needed to implement patterns like:
.* .+ .? .{m,n}
(a b c | d e | f)
(...)* (...)+ (...)? (...){m,n}
except it always uses non-greedy matching.
"""
min: int
max: int
def __init__(
self,
content: Optional[str] = None,
min: int = 0,
max: int = HUGE,
name: Optional[str] = None,
) -> None:
"""
Initializer.
Args:
content: optional sequence of subsequences of patterns;
if absent, matches one node;
if present, each subsequence is an alternative [*]
min: optional minimum number of times to match, default 0
max: optional maximum number of times to match, default HUGE
name: optional name assigned to this match
[*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
equivalent to (a b c | d e | f g h); if content is None,
this is equivalent to '.' in regular expression terms.
The min and max parameters work as follows:
min=0, max=maxint: .*
min=1, max=maxint: .+
min=0, max=1: .?
min=1, max=1: .
If content is not None, replace the dot with the parenthesized
list of alternatives, e.g. (a b c | d e | f g h)*
"""
assert 0 <= min <= max <= HUGE, (min, max)
if content is not None:
f = lambda s: tuple(s)
wrapped_content = tuple(map(f, content)) # Protect against alterations
# Check sanity of alternatives
assert len(wrapped_content), repr(
wrapped_content
) # Can't have zero alternatives
for alt in wrapped_content:
assert len(alt), repr(alt) # Can have empty alternatives
self.content = wrapped_content
self.min = min
self.max = max
self.name = name
def optimize(self) -> Any:
"""Optimize certain stacked wildcard patterns."""
subpattern = None
if (
self.content is not None
and len(self.content) == 1
and len(self.content[0]) == 1
):
subpattern = self.content[0][0]
if self.min == 1 and self.max == 1:
if self.content is None:
return NodePattern(name=self.name)
if subpattern is not None and self.name == subpattern.name:
return subpattern.optimize()
if (
self.min <= 1
and isinstance(subpattern, WildcardPattern)
and subpattern.min <= 1
and self.name == subpattern.name
):
return WildcardPattern(
subpattern.content,
self.min * subpattern.min,
self.max * subpattern.max,
subpattern.name,
)
return self
def match(self, node, results=None) -> bool:
"""Does this pattern exactly match a node?"""
return self.match_seq([node], results)
def match_seq(self, nodes, results=None) -> bool:
"""Does this pattern exactly match a sequence of nodes?"""
for c, r in self.generate_matches(nodes):
if c == len(nodes):
if results is not None:
results.update(r)
if self.name:
results[self.name] = list(nodes)
return True
return False
def generate_matches(self, nodes) -> Iterator[tuple[int, _Results]]:
"""
Generator yielding matches for a sequence of nodes.
Args:
nodes: sequence of nodes
Yields:
(count, results) tuples where:
count: the match comprises nodes[:count];
results: dict containing named submatches.
"""
if self.content is None:
# Shortcut for special case (see __init__.__doc__)
for count in range(self.min, 1 + min(len(nodes), self.max)):
r = {}
if self.name:
r[self.name] = nodes[:count]
yield count, r
elif self.name == "bare_name":
yield self._bare_name_matches(nodes)
else:
# The reason for this is that hitting the recursion limit usually
# results in some ugly messages about how RuntimeErrors are being
# ignored. We only have to do this on CPython, though, because other
# implementations don't have this nasty bug in the first place.
if hasattr(sys, "getrefcount"):
save_stderr = sys.stderr
sys.stderr = StringIO()
try:
for count, r in self._recursive_matches(nodes, 0):
if self.name:
r[self.name] = nodes[:count]
yield count, r
except RuntimeError:
# We fall back to the iterative pattern matching scheme if the recursive
# scheme hits the recursion limit.
for count, r in self._iterative_matches(nodes):
if self.name:
r[self.name] = nodes[:count]
yield count, r
finally:
if hasattr(sys, "getrefcount"):
sys.stderr = save_stderr
def _iterative_matches(self, nodes) -> Iterator[tuple[int, _Results]]:
"""Helper to iteratively yield the matches."""
nodelen = len(nodes)
if 0 >= self.min:
yield 0, {}
results = []
# generate matches that use just one alt from self.content
for alt in self.content:
for c, r in generate_matches(alt, nodes):
yield c, r
results.append((c, r))
# for each match, iterate down the nodes
while results:
new_results = []
for c0, r0 in results:
# stop if the entire set of nodes has been matched
if c0 < nodelen and c0 <= self.max:
for alt in self.content:
for c1, r1 in generate_matches(alt, nodes[c0:]):
if c1 > 0:
r = {}
r.update(r0)
r.update(r1)
yield c0 + c1, r
new_results.append((c0 + c1, r))
results = new_results
def _bare_name_matches(self, nodes) -> tuple[int, _Results]:
"""Special optimized matcher for bare_name."""
count = 0
r = {} # type: _Results
done = False
max = len(nodes)
while not done and count < max:
done = True
for leaf in self.content:
if leaf[0].match(nodes[count], r):
count += 1
done = False
break
assert self.name is not None
r[self.name] = nodes[:count]
return count, r
def _recursive_matches(self, nodes, count) -> Iterator[tuple[int, _Results]]:
"""Helper to recursively yield the matches."""
assert self.content is not None
if count >= self.min:
yield 0, {}
if count < self.max:
for alt in self.content:
for c0, r0 in generate_matches(alt, nodes):
for c1, r1 in self._recursive_matches(nodes[c0:], count + 1):
r = {}
r.update(r0)
r.update(r1)
yield c0 + c1, r
class NegatedPattern(BasePattern):
def __init__(self, content: Optional[BasePattern] = None) -> None:
"""
Initializer.
The argument is either a pattern or None. If it is None, this
only matches an empty sequence (effectively '$' in regex
lingo). If it is not None, this matches whenever the argument
pattern doesn't have any matches.
"""
if content is not None:
assert isinstance(content, BasePattern), repr(content)
self.content = content
def match(self, node, results=None) -> bool:
# We never match a node in its entirety
return False
def match_seq(self, nodes, results=None) -> bool:
# We only match an empty sequence of nodes in its entirety
return len(nodes) == 0
def generate_matches(self, nodes: list[NL]) -> Iterator[tuple[int, _Results]]:
if self.content is None:
# Return a match if there is an empty sequence
if len(nodes) == 0:
yield 0, {}
else:
# Return a match if the argument pattern has no matches
for c, r in self.content.generate_matches(nodes):
return
yield 0, {}
def generate_matches(
patterns: list[BasePattern], nodes: list[NL]
) -> Iterator[tuple[int, _Results]]:
"""
Generator yielding matches for a sequence of patterns and nodes.
Args:
patterns: a sequence of patterns
nodes: a sequence of nodes
Yields:
(count, results) tuples where:
count: the entire sequence of patterns matches nodes[:count];
results: dict containing named submatches.
"""
if not patterns:
yield 0, {}
else:
p, rest = patterns[0], patterns[1:]
for c0, r0 in p.generate_matches(nodes):
if not rest:
yield c0, r0
else:
for c1, r1 in generate_matches(rest, nodes[c0:]):
r = {}
r.update(r0)
r.update(r1)
yield c0 + c1, r