Source code for myst_parser.docutils_

"""A module for compatibility with the docutils>=0.17 `include` directive, in RST documents::

   .. include:: path/to/file.md
      :parser: myst_parser.docutils_
"""
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union

from attr import Attribute
from docutils import frontend, nodes
from docutils.core import default_description, publish_cmdline
from docutils.parsers.rst import Parser as RstParser
from markdown_it.token import Token
from typing_extensions import Literal, get_args, get_origin

from myst_parser.docutils_renderer import DocutilsRenderer
from myst_parser.main import MdParserConfig, create_md_parser


def _validate_int(
    setting, value, option_parser, config_parser=None, config_section=None
) -> int:
    """Validate an integer setting."""
    return int(value)


def _create_validate_tuple(length: int) -> Callable[..., Tuple[str, ...]]:
    """Create a validator for a tuple of length `length`."""

    def _validate(
        setting, value, option_parser, config_parser=None, config_section=None
    ):
        string_list = frontend.validate_comma_separated_list(
            setting, value, option_parser, config_parser, config_section
        )
        if len(string_list) != length:
            raise ValueError(
                f"Expecting {length} items in {setting}, got {len(string_list)}."
            )
        return tuple(string_list)

    return _validate


class Unset:
    """A sentinel class for unset settings."""

    def __repr__(self):
        return "UNSET"


DOCUTILS_UNSET = Unset()
"""Sentinel for arguments not set through docutils.conf."""


DOCUTILS_EXCLUDED_ARGS = (
    # docutils.conf can't represent callables
    "heading_slug_func",
    # docutils.conf can't represent dicts
    "html_meta",
    "substitutions",
    # we can't add substitutions so not needed
    "sub_delimiters",
    # sphinx only options
    "heading_anchors",
    "ref_domains",
    "update_mathjax",
    "mathjax_classes",
)
"""Names of settings that cannot be set in docutils.conf."""


def _attr_to_optparse_option(at: Attribute, default: Any) -> Tuple[dict, str]:
    """Convert an ``attrs.Attribute`` into a Docutils optparse options dict."""
    if at.type is int:
        return {"metavar": "<int>", "validator": _validate_int}, f"(default: {default})"
    if at.type is bool:
        return {
            "metavar": "<boolean>",
            "validator": frontend.validate_boolean,
        }, f"(default: {default})"
    if at.type is str:
        return {
            "metavar": "<str>",
        }, f"(default: '{default}')"
    if get_origin(at.type) is Literal and all(
        isinstance(a, str) for a in get_args(at.type)
    ):
        args = get_args(at.type)
        return {
            "metavar": f"<{'|'.join(repr(a) for a in args)}>",
            "type": "choice",
            "choices": args,
        }, f"(default: {default!r})"
    if at.type in (Iterable[str], Sequence[str]):
        return {
            "metavar": "<comma-delimited>",
            "validator": frontend.validate_comma_separated_list,
        }, f"(default: '{','.join(default)}')"
    if at.type == Tuple[str, str]:
        return {
            "metavar": "<str,str>",
            "validator": _create_validate_tuple(2),
        }, f"(default: '{','.join(default)}')"
    if at.type == Union[int, type(None)]:
        return {
            "metavar": "<null|int>",
            "validator": _validate_int,
        }, f"(default: {default})"
    if at.type == Union[Iterable[str], type(None)]:
        default_str = ",".join(default) if default else ""
        return {
            "metavar": "<null|comma-delimited>",
            "validator": frontend.validate_comma_separated_list,
        }, f"(default: {default_str!r})"
    raise AssertionError(
        f"Configuration option {at.name} not set up for use in docutils.conf."
    )


def attr_to_optparse_option(
    attribute: Attribute, default: Any, prefix: str = "myst_"
) -> Tuple[str, List[str], Dict[str, Any]]:
    """Convert an ``MdParserConfig`` attribute into a Docutils setting tuple.

    :returns: A tuple of ``(help string, option flags, optparse kwargs)``.
    """
    name = f"{prefix}{attribute.name}"
    flag = "--" + name.replace("_", "-")
    options = {"dest": name, "default": DOCUTILS_UNSET}
    at_options, type_str = _attr_to_optparse_option(attribute, default)
    options.update(at_options)
    help_str = attribute.metadata.get("help", "") if attribute.metadata else ""
    return (f"{help_str} {type_str}", [flag], options)


def create_myst_settings_spec(
    excluded: Sequence[str], config_cls=MdParserConfig, prefix: str = "myst_"
):
    """Return a list of Docutils setting for the docutils MyST section."""
    defaults = config_cls()
    return tuple(
        attr_to_optparse_option(at, getattr(defaults, at.name), prefix)
        for at in config_cls.get_fields()
        if at.name not in excluded
    )


def create_myst_config(
    settings: frontend.Values,
    excluded: Sequence[str],
    config_cls=MdParserConfig,
    prefix: str = "myst_",
):
    """Create a configuration instance from the given settings."""
    values = {}
    for attribute in config_cls.get_fields():
        if attribute.name in excluded:
            continue
        setting = f"{prefix}{attribute.name}"
        val = getattr(settings, setting, DOCUTILS_UNSET)
        if val is not DOCUTILS_UNSET:
            values[attribute.name] = val
    return config_cls(**values)


[docs]class Parser(RstParser): """Docutils parser for Markedly Structured Text (MyST).""" supported: Tuple[str, ...] = ("md", "markdown", "myst") """Aliases this parser supports.""" settings_spec = ( "MyST options", None, create_myst_settings_spec(DOCUTILS_EXCLUDED_ARGS), *RstParser.settings_spec, ) """Runtime settings specification.""" config_section = "myst parser" config_section_dependencies = ("parsers",) translate_section_name = None
[docs] def parse(self, inputstring: str, document: nodes.document) -> None: """Parse source text. :param inputstring: The source string to parse :param document: The root docutils node to add AST elements to """ self.setup_parse(inputstring, document) # check for exorbitantly long lines if hasattr(document.settings, "line_length_limit"): for i, line in enumerate(inputstring.split("\n")): if len(line) > document.settings.line_length_limit: error = document.reporter.error( f"Line {i+1} exceeds the line-length-limit:" f" {document.settings.line_length_limit}." ) document.append(error) return # create parsing configuration try: config = create_myst_config(document.settings, DOCUTILS_EXCLUDED_ARGS) except Exception as exc: error = document.reporter.error(f"myst configuration invalid: {exc}") document.append(error) config = MdParserConfig() # parse content parser = create_md_parser(config, DocutilsRenderer) parser.options["document"] = document env: dict = {} tokens = parser.parse(inputstring, env) if not tokens or tokens[0].type != "front_matter": # we always add front matter, so that we can merge it with global keys, # specified in the sphinx configuration tokens = [Token("front_matter", "", 0, content="{}", map=[0, 0])] + tokens parser.renderer.render(tokens, parser.options, env) # post-processing # replace raw nodes if raw is not allowed if not getattr(document.settings, "raw_enabled", True): for node in document.traverse(nodes.raw): warning = document.reporter.warning("Raw content disabled.") node.parent.replace(node, warning) self.finish_parse()
def _run_cli(writer_name: str, writer_description: str, argv: Optional[List[str]]): """Run the command line interface for a particular writer.""" publish_cmdline( parser=Parser(), writer_name=writer_name, description=( f"Generates {writer_description} from standalone MyST sources.\n{default_description}" ), argv=argv, ) def cli_html(argv: Optional[List[str]] = None) -> None: """Cmdline entrypoint for converting MyST to HTML.""" _run_cli("html", "(X)HTML documents", argv) def cli_html5(argv: Optional[List[str]] = None): """Cmdline entrypoint for converting MyST to HTML5.""" _run_cli("html5", "HTML5 documents", argv) def cli_latex(argv: Optional[List[str]] = None): """Cmdline entrypoint for converting MyST to LaTeX.""" _run_cli("latex", "LaTeX documents", argv) def cli_xml(argv: Optional[List[str]] = None): """Cmdline entrypoint for converting MyST to XML.""" _run_cli("xml", "Docutils-native XML", argv) def cli_pseudoxml(argv: Optional[List[str]] = None): """Cmdline entrypoint for converting MyST to pseudo-XML.""" _run_cli("pseudoxml", "pseudo-XML", argv)