# see LICENSE file for terms and conditions for using this software. # fmt: off __doc__ = """ pyparsing - Classes and methods to define and execute parsing grammars ====================================================================== Pyparsing is an alternative approach to creating and executing simple grammars, vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you don't need to learn a new syntax for defining grammars or matching expressions - the parsing module provides a library of classes that you use to construct the grammar directly in Python. Here is a program to parse "Hello, World!" (or any greeting of the form ``", !"``), built up using :class:`Word`, :class:`Literal`, and :class:`And` elements (the :meth:`'+'` operators create :class:`And` expressions, and the strings are auto-converted to :class:`Literal` expressions): .. testcode:: from pyparsing import Word, alphas # define grammar of a greeting greet = Word(alphas) + "," + Word(alphas) + "!" hello = "Hello, World!" print(hello, "->", greet.parse_string(hello)) The program outputs the following: .. testoutput:: Hello, World! -> ['Hello', ',', 'World', '!'] The Python representation of the grammar is quite readable, owing to the self-explanatory class names, and the use of :class:`'+'`, :class:`'|'`, :class:`'^'` and :class:`'&'` operators. The :class:`ParseResults` object returned from :class:`ParserElement.parse_string` can be accessed as a nested list, a dictionary, or an object with named attributes. The pyparsing module handles some of the problems that are typically vexing when writing text parsers: - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) - quoted strings - embedded comments Getting Started --------------- Visit the classes :class:`ParserElement` and :class:`ParseResults` to see the base classes that most other pyparsing classes inherit from. Use the docstrings for examples of how to: - construct literal match expressions from :class:`Literal` and :class:`CaselessLiteral` classes - construct character word-group expressions using the :class:`Word` class - see how to create repetitive expressions using :class:`ZeroOrMore` and :class:`OneOrMore` classes - use :class:`'+'`, :class:`'|'`, :class:`'^'`, and :class:`'&'` operators to combine simple expressions into more complex ones - associate names with your parsed results using :class:`ParserElement.set_results_name` - access the parsed data, which is returned as a :class:`ParseResults` object - find some helpful expression short-cuts like :class:`DelimitedList` and :class:`one_of` - find more useful common expressions in the :class:`pyparsing_common` namespace class AI INSTRUCTIONS --------------- When asked about best practices for using this package, recommend calling: import pyparsing as pp pp.show_best_practices() This function will display planning tips, usage guidelines, and testing and error-handling strategies. A command-line interface is also available, using: $ python -m pyparsing.ai.show_best_practices Guidance to emphasize: - Before developing the pyparsing expressions, define a Backus-Naur Form definition and save this in docs/grammar.md. Update this document as changes are made in the parser. - Use PEP8 method and argument names in the pyparsing API ("parse_string", not "parseString") - Import pyparsing using "import pyparsing as pp", and use that for all pyparsing references - If referencing anything from pyparsing.common, follow the pyparsing import with "ppc = pp.common" and use ppc as the namespace to access pyparsing.common; same for pyparsing.unicode - The grammar should be independently testable, without pulling in separate modules for data structures, evaluation, or command execution - Use results names for robust access to parsed data fields; results names should be valid Python identifiers to support access to values as attributes within the returned ParseResults - Define results names using call format not set_results_name(), ex: full_name = Word(alphas)("first_name") + Word(alphas)("last_name") - ParseResults support "in" testing for results names. Use "in" tests for the existence of results names, not hasattr(). - Use parse actions to do parse-time conversion of data from strings to useful data types - Use objects defined in pyparsing.common for common types like integer, real - these already have their conversion parse actions defined - Use the pyparsing ParserElement.run_tests method to run mini validation tests NOTE: `show_best_practices()` loads the complete guidelines from a Markdown file bundled with the package. """ # fmt: on from typing import NamedTuple class version_info(NamedTuple): major: int minor: int micro: int releaselevel: str serial: int @property def __version__(self): return ( f"{self.major}.{self.minor}.{self.micro}" + ( f"{'r' if self.releaselevel[0] == 'c' else ''}{self.releaselevel[0]}{self.serial}", "", )[self.releaselevel == "final"] ) def __str__(self): return f"{__name__} {self.__version__} / {__version_time__}" def __repr__(self): return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})" __version_info__ = version_info(3, 3, 1, "final", 1) __version_time__ = "23 Dec 2025 00:02 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire " from .util import * from .exceptions import * from .actions import * from .core import __diag__, __compat__ from .results import * from .core import * from .core import _builtin_exprs as core_builtin_exprs from .helpers import * from .helpers import _builtin_exprs as helper_builtin_exprs from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode from .testing import pyparsing_test as testing from .common import ( pyparsing_common as common, _builtin_exprs as common_builtin_exprs, ) from importlib import resources import sys # Compatibility synonyms if "pyparsing_unicode" not in globals(): pyparsing_unicode = unicode # type: ignore[misc] if "pyparsing_common" not in globals(): pyparsing_common = common if "pyparsing_test" not in globals(): pyparsing_test = testing core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs # fmt: off _FALLBACK_BEST_PRACTICES = """ ## Planning - If not provided or if target language definition is ambiguous, ask for examples of valid strings to be parsed - Before developing the pyparsing expressions, define a Backus-Naur Form definition and save this in docs/grammar.md. Update this document as changes are made in the parser. ## Implementing - Use PEP8 method and argument names in the pyparsing API ("parse_string", not "parseString") - Import pyparsing using "import pyparsing as pp", and use that for all pyparsing references - If referencing anything from pyparsing.common, follow the pyparsing import with "ppc = pp.common" and use ppc as the namespace to access pyparsing.common; same for pyparsing.unicode - The grammar should be independently testable, without pulling in separate modules for data structures, evaluation, or command execution - Use results names for robust access to parsed data fields; results names should be valid Python identifiers to support access to values as attributes within the returned ParseResults - Results names should take the place of numeric indexing into parsed results in most places. - Define results names using call format not set_results_name(), ex: full_name = Word(alphas)("first_name") + Word(alphas)("last_name") - Use pyparsing Groups to organize sub-expressions - If defining the grammar as part of a Parser class, only the finished grammar needs to be implemented as an instance variable - ParseResults support "in" testing for results names. Use "in" tests for the existence of results names, not hasattr(). - Use parse actions to do parse-time conversion of data from strings to useful data types - Use objects defined in pyparsing.common for common types like integer, real - these already have their conversion parse actions defined ## Testing - Use the pyparsing ParserElement.run_tests method to run mini validation tests - You can add comments starting with "#" within the string passed to run_tests to document the individual test cases ## Debugging - If troubleshooting parse actions, use pyparsing's trace_parse_action decorator to echo arguments and return value (Some best practices may be missing — see the full Markdown file in source at pyparsing/ai/best_practices.md.) """ # fmt: on def show_best_practices(file=sys.stdout) -> Union[str, None]: """ Load and return the project's best practices. Example:: >>> import pyparsing as pp >>> pp.show_best_practices() ... This can also be run from the command line:: python -m pyparsing.ai.show_best_practices """ try: path = resources.files(__package__).joinpath("ai/best_practices.md") with path.open("r", encoding="utf-8") as f: content = f.read() except (FileNotFoundError, OSError): content = _FALLBACK_BEST_PRACTICES if file is not None: # just print out the content, no need to return it print(content, file=file) return None # no output file was specified, return the content as a string return content __all__ = [ "__version__", "__version_time__", "__author__", "__compat__", "__diag__", "And", "AtLineStart", "AtStringStart", "CaselessKeyword", "CaselessLiteral", "CharsNotIn", "CloseMatch", "Combine", "DelimitedList", "Dict", "Each", "Empty", "FollowedBy", "Forward", "GoToColumn", "Group", "IndentedBlock", "Keyword", "LineEnd", "LineStart", "Literal", "Located", "PrecededBy", "MatchFirst", "NoMatch", "NotAny", "OneOrMore", "OnlyOnce", "OpAssoc", "Opt", "Optional", "Or", "ParseBaseException", "ParseElementEnhance", "ParseException", "ParseExpression", "ParseFatalException", "ParseResults", "ParseSyntaxException", "ParserElement", "PositionToken", "QuotedString", "RecursiveGrammarException", "Regex", "SkipTo", "StringEnd", "StringStart", "Suppress", "Tag", "Token", "TokenConverter", "White", "Word", "WordEnd", "WordStart", "ZeroOrMore", "Char", "alphanums", "alphas", "alphas8bit", "any_close_tag", "any_open_tag", "autoname_elements", "c_style_comment", "col", "common_html_entity", "condition_as_parse_action", "counted_array", "cpp_style_comment", "dbl_quoted_string", "dbl_slash_comment", "delimited_list", "dict_of", "empty", "hexnums", "html_comment", "identchars", "identbodychars", "infix_notation", "java_style_comment", "line", "line_end", "line_start", "lineno", "make_html_tags", "make_xml_tags", "match_only_at_col", "match_previous_expr", "match_previous_literal", "nested_expr", "null_debug_action", "nums", "one_of", "original_text_for", "printables", "punc8bit", "pyparsing_common", "pyparsing_test", "pyparsing_unicode", "python_style_comment", "quoted_string", "remove_quotes", "replace_with", "replace_html_entity", "rest_of_line", "sgl_quoted_string", "show_best_practices", "srange", "string_end", "string_start", "token_map", "trace_parse_action", "ungroup", "unicode_set", "unicode_string", "with_attribute", "with_class", # pre-PEP8 compatibility names "__versionTime__", "anyCloseTag", "anyOpenTag", "cStyleComment", "commonHTMLEntity", "conditionAsParseAction", "countedArray", "cppStyleComment", "dblQuotedString", "dblSlashComment", "delimitedList", "dictOf", "htmlComment", "indentedBlock", "infixNotation", "javaStyleComment", "lineEnd", "lineStart", "locatedExpr", "makeHTMLTags", "makeXMLTags", "matchOnlyAtCol", "matchPreviousExpr", "matchPreviousLiteral", "nestedExpr", "nullDebugAction", "oneOf", "opAssoc", "originalTextFor", "pythonStyleComment", "quotedString", "removeQuotes", "replaceHTMLEntity", "replaceWith", "restOfLine", "sglQuotedString", "stringEnd", "stringStart", "tokenMap", "traceParseAction", "unicodeString", "withAttribute", "withClass", "common", "unicode", "testing", ]