diff --git a/.gitignore b/.gitignore index 8a6468e..57fb45b 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ __pycache__/ .eggs/ .pytest_cache/ .DS_Store +.venv/ diff --git a/README.md b/README.md index 46e89fa..6457122 100644 --- a/README.md +++ b/README.md @@ -3,4 +3,116 @@ A plugin for mkdocs that allows some advanced 'includes' functionality to be used for embedded code blocks. This is effectively an extended Markdown format, but is intended to degrade gracefully when rendered with a different renderer. -This README will be extended to include examples at a later date. +## Installation + +1. Add dependency on the plugin: + +```requirements.txt +-e git+https://github.com/rnorth/mkdocs-codeinclude-plugin#egg=mkdocs_codeinclude_plugin +``` + +You have to use Git dependency specification until the plugin is published on PyPy. + +2. Add `codeinclude` to the list of your MkDocs plugins (typically listed in `mkdocs.yml`): + +```yaml +plugins: + - codeinclude +``` + +## Usage + +A codeinclude block resembles a regular markdown link surrounded by a pair of XML comments, e.g.: + + + +
<!--codeinclude-->
+[Human readable title for snippet](./relative_path_to_example_code.java) targeting_expression
+<!--/codeinclude-->
+
+ +Where `targeting_expression` could be: + +* `block:someString` or +* `inside_block:someString` + +If these are provided, the macro will seek out any line containing the token `someString` and grab the next curly brace +delimited block that it finds. `block` will grab the starting line and closing brace, whereas `inside_block` will omit +these. If no `targeting_expression` is provided, the whole file is included. + +e.g., given: +```java + +public class FooService { + + public void doFoo() { + foo.doSomething(); + } + +} +``` + +If we use `block:doFoo` as our targeting expression, we will have the following content included into our page: + +```java +public void doFoo() { + foo.doSomething(); +} +``` + +Whereas using `inside_block:doFoo` we would just have the inner content of the method included: + +```java +foo.doSomething(); +``` + +Note that: + +* Any code included will be have its indentation reduced +* Every line in the source file will be searched for an instance of the token (e.g. `doFoo`). If more than one line + includes that token, then potentially more than one block could be targeted for inclusion. It is advisable to use a + specific, unique token to avoid unexpected behaviour. + +When we wish to include a section of code that does not naturally appear within braces, we can simply insert our token, +with matching braces, in a comment. +While a little ugly, this has the benefit of working in any context, even in languages that do not use +curly braces, and is easy to understand. +For example: + +```java +public class FooService { + + public void boringMethod() { + doSomethingBoring(); + + // doFoo { + doTheThingThatWeActuallyWantToShow(); + // } + } + +} +``` + +will be rendered as: + +```java +doTheThingThatWeActuallyWantToShow(); +``` + +## Building the Project + +Install the dependencies: + +```shell +pip install -r requirements.txt +pip install nose # Optionally, install nose to run the tests +``` + +Run the tests: +```shell +nosetests +``` diff --git a/codeinclude/languages.py b/codeinclude/languages.py new file mode 100644 index 0000000..8862ff3 --- /dev/null +++ b/codeinclude/languages.py @@ -0,0 +1,20 @@ +from pygments.lexers import get_lexer_for_filename +from pygments.util import ClassNotFound + + +def get_lang_class(filename: str) -> str: + """Returns the Pygments _language alias_ for the filename. + + Pygments is used by codehilite, a widely used extension for code highlighting: + https://squidfunk.github.io/mkdocs-material/extensions/codehilite/ + + The Pygments language aliases are expected to be compatible with highlight.js language classes, + which are used by some MkDocs themes: https://www.mkdocs.org/user-guide/styling-your-docs/#built-in-themes + For a table of 'Language -> Language Classes' in _highlight.js_, + see https://github.com/highlightjs/highlight.js#supported-languages + """ + try: + lexer = get_lexer_for_filename(filename) + return lexer.aliases[0] + except ClassNotFound: + return "none" diff --git a/codeinclude/plugin.py b/codeinclude/plugin.py index b0f3931..9f1eb91 100644 --- a/codeinclude/plugin.py +++ b/codeinclude/plugin.py @@ -2,9 +2,12 @@ import os import shlex import textwrap +from dataclasses import dataclass +from typing import List from mkdocs.plugins import BasePlugin from codeinclude.resolver import select +from codeinclude.languages import get_lang_class RE_START = r"""(?x) ^ @@ -22,21 +25,104 @@ $ """ -RE_SNIPPET = r"""(?x) +RE_SNIPPET = r"""(?xm) ^ (?P\s*) \[(?P[^\]]*)\]\((?P<filename>[^)]+)\) - ([\t ]+(?P<params>.*))? + ([\t\n ]+(?P<params>[\w:-]+))? (?P<ignored_trailing_space>\s*) $ """ -def get_substitute(page, title, filename, lines, block, inside_block): +class CodeIncludePlugin(BasePlugin): + def on_page_markdown(self, markdown, page, config, site_navigation=None, **kwargs): + "Provide a hook for defining functions from an external module" + blocks = find_code_include_blocks(markdown) + substitutes = get_substitutes(blocks, page) + return substitute(markdown, substitutes) + + +@dataclass +class CodeIncludeBlock(object): + first_line_index: int + last_line_index: int + content: str + + +def find_code_include_blocks(markdown: str) -> List[CodeIncludeBlock]: + ci_blocks = list() + first = -1 + in_block = False + lines = markdown.splitlines() + for index, line in enumerate(lines): + if re.match(RE_START, lines[index]): + if in_block: + raise ValueError(f"Found two consecutive code-include starts: at lines {first} and {index}") + first = index + in_block = True + elif re.match(RE_END, lines[index]): + if not in_block: + raise ValueError(f"Found code-include end without preceding start at line {index}") + last = index + content = '\n'.join(lines[first:last + 1]) + ci_blocks.append(CodeIncludeBlock(first, last, content)) + in_block = False + return ci_blocks + + +@dataclass +class Replacement(object): + first_line_index: int + last_line_index: int + content: str + + +def get_substitutes(blocks: List[CodeIncludeBlock], page) -> List[Replacement]: + replacements = list() + for ci_block in blocks: + replacement_content = "" + for snippet_match in re.finditer(RE_SNIPPET, ci_block.content): + title = snippet_match.group("title") + filename = snippet_match.group("filename") + indent = snippet_match.group("leading_space") + raw_params = snippet_match.group("params") + + if raw_params: + params = dict(token.split(":") for token in shlex.split(raw_params)) + lines = params.get("lines", "") + block = params.get("block", "") + inside_block = params.get("inside_block", "") + else: + lines = "" + block = "" + inside_block = "" + + code_block = get_substitute( + page, title, filename, lines, block, inside_block + ) + # re-indent + code_block = re.sub("^", indent, code_block, flags=re.MULTILINE) + + replacement_content += code_block + replacements.append(Replacement(ci_block.first_line_index, ci_block.last_line_index, replacement_content)) + return replacements + + +def get_substitute(page, title, filename, lines, block, inside_block): + # Compute the fence header + lang_code = get_lang_class(filename) + header = lang_code + title = title.strip() + if len(title) > 0: + header += f' tab="{title}"' + + # Select the code content page_parent_dir = os.path.dirname(page.file.abs_src_path) import_path = os.path.join(page_parent_dir, filename) - with open(import_path) as f: + # Always use UTF-8, as it is the recommended default for source file encodings. + with open(import_path, encoding='UTF-8') as f: content = f.read() selected_content = select( @@ -45,56 +131,32 @@ def get_substitute(page, title, filename, lines, block, inside_block): dedented = textwrap.dedent(selected_content) - return '\n```java tab="' + title + '"\n' + dedented + "\n```\n\n" - - -class CodeIncludePlugin(BasePlugin): - def on_page_markdown(self, markdown, page, config, site_navigation=None, **kwargs): - "Provide a hook for defining functions from an external module" - - active = False - results = "" - for line in markdown.splitlines(): - boundary = False - - # detect end - if active and re.match(RE_END, line): - active = False - boundary = True - - # handle each line of a codeinclude zone - if active: - snippet_match = re.match(RE_SNIPPET, line) - if snippet_match: - title = snippet_match.group("title") - filename = snippet_match.group("filename") - indent = snippet_match.group("leading_space") - raw_params = snippet_match.group("params") - - if raw_params: - params = dict(token.split(":") for token in shlex.split(raw_params)) - lines = params.get("lines", "") - block = params.get("block", "") - inside_block = params.get("inside_block", "") - else: - lines = "" - block = "" - inside_block = "" - - code_block = get_substitute( - page, title, filename, lines, block, inside_block - ) - # re-indent - code_block = re.sub("^", indent, code_block, flags=re.MULTILINE) - results += code_block - - # detect start - if re.match(RE_START, line): - active = True - boundary = True - - # outside a codeinclude zone and ignoring the boundaries - if not active and not boundary: - results += line + "\n" - - return results + return f''' +```{header} +{dedented} +``` + +''' + + +def substitute(markdown: str, substitutes: List[Replacement]) -> str: + substitutes_by_first_line = dict() + # Index substitutes by the first line + for s in substitutes: + substitutes_by_first_line[s.first_line_index] = s + + # Perform substitutions + result = "" + index = 0 + lines = markdown.splitlines() + while index < len(lines): + if index in substitutes_by_first_line.keys(): + # Replace the codeinclude fragment starting at this line + substitute = substitutes_by_first_line[index] + result += substitute.content + index = substitute.last_line_index + else: + # Keep the input line + result += lines[index] + "\n" + index += 1 + return result diff --git a/codeinclude/resolver.py b/codeinclude/resolver.py index b1610c9..0a1bb13 100644 --- a/codeinclude/resolver.py +++ b/codeinclude/resolver.py @@ -43,21 +43,31 @@ def select( delim_count -= line.count("}") if inside_block: - i = 0 delim_count = 0 - for line in text.splitlines(): + inside_matching = False + for line_number, line in enumerate(text.splitlines(), start=1): first_line_of_block = False - i = i + 1 + # Detect the block beginning if inside_block in line and delim_count <= 0: delim_count = 0 first_line_of_block = True - delim_count += line.count("{") + inside_matching = True + # Don't process lines that are outside the matching block + if not inside_matching: + continue + + # Count the brackets in the line + delim_count += line.count("{") delim_count -= line.count("}") - if delim_count > 0 and not first_line_of_block: - delim_count += line.count("{") - selected_lines.append(i) + # If we closed the opening bracket (= dropped below 0), the matching block has ended + if delim_count <= 0: + inside_matching = False + + # Append the lines inside the matching block, skipping the first matching + if inside_matching and not first_line_of_block: + selected_lines.append(line_number) if from_token and to_token: i = 0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ecf975e --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +-e . \ No newline at end of file diff --git a/setup.py b/setup.py index e56283a..f3fdd10 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ def read_file(fname): python_requires='>=3.6', install_requires=[ 'mkdocs>=0.17', - 'mkdocs' + 'pygments' ], classifiers=[ 'Development Status :: 5 - Production/Stable', diff --git a/tests/codeinclude/fixture/Bar.java b/tests/codeinclude/fixture/Bar.java index a55f182..e81ef31 100644 --- a/tests/codeinclude/fixture/Bar.java +++ b/tests/codeinclude/fixture/Bar.java @@ -1,3 +1,3 @@ public class Bar { - + // This UTF-8 encoded file has some multi-byte characters: œ, ë } \ No newline at end of file diff --git a/tests/codeinclude/fixture/Curly.java b/tests/codeinclude/fixture/Curly.java new file mode 100644 index 0000000..04101d4 --- /dev/null +++ b/tests/codeinclude/fixture/Curly.java @@ -0,0 +1,3 @@ +public class Curly { + public static String RIGHT_CURLY_REGEX = "\\}"; +} \ No newline at end of file diff --git a/tests/codeinclude/test_languages.py b/tests/codeinclude/test_languages.py new file mode 100644 index 0000000..7479120 --- /dev/null +++ b/tests/codeinclude/test_languages.py @@ -0,0 +1,18 @@ +import unittest +from codeinclude.languages import get_lang_class + + +class MyTestCase(unittest.TestCase): + def test_get_lang_class(self): + self.assertEquals('java', get_lang_class('HelloWorld.java')) + self.assertEquals('python', get_lang_class('HelloWorld.py')) + self.assertEquals('csharp', get_lang_class('HelloWorld.cs')) + self.assertEquals('rust', get_lang_class('HelloWorld.rs')) + self.assertEquals('docker', get_lang_class('Dockerfile')) + self.assertEquals('xml', get_lang_class('HelloWorld.xml')) + self.assertEquals('toml', get_lang_class('HelloWorld.toml')) + self.assertEquals('json', get_lang_class('HelloWorld.json')) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/codeinclude/test_plugin.py b/tests/codeinclude/test_plugin.py index 54775f4..78aa01d 100644 --- a/tests/codeinclude/test_plugin.py +++ b/tests/codeinclude/test_plugin.py @@ -8,7 +8,14 @@ from codeinclude.plugin import CodeIncludePlugin -MARKDOWN_EXAMPLE = """ +MARKDOWN_EXAMPLE_NO_INCLUDES = """ +# hello world + +some text before + +""" + +MARKDOWN_EXAMPLE_NO_SELECTOR = """ # hello world some text before @@ -19,6 +26,29 @@ """ +MARKDOWN_EXAMPLE_SELECTOR_ON_SAME_LINE = """ +# hello world + +some text before +<!--codeinclude--> +[foo](Foo.java) lines:1 +<!--/codeinclude--> +and some text after + +""" + +MARKDOWN_EXAMPLE_SELECTOR_ON_NEXT_LINE = """ +# hello world + +some text before +<!--codeinclude--> +[foo](Foo.java) +lines:1 +<!--/codeinclude--> +and some text after + +""" + MULTI_TAB_MARKDOWN_EXAMPLE = """ # hello world @@ -31,17 +61,43 @@ """ +EMPTY_TITLE_MARKDOWN_EXAMPLE = """ +# hello world + +some text before +<!--codeinclude--> +[](Foo.java) +<!--/codeinclude--> +and some text after + +""" + +MARKDOWN_EXAMPLE_RIGHT_CURLY = """ +# hello world + +<!--codeinclude--> +[Curly](Curly.java) block:Curly +<!--/codeinclude--> +""" + c = Config(schema=DEFAULT_SCHEMA) c["site_url"] = "http://example.org/" -PAGE_EXAMPLE = Page("", File(os.path.abspath("./fixture/text.md"), "/src", "/dest", False), c) +PAGE_EXAMPLE = Page("", File(os.path.abspath("./tests/codeinclude/fixture/text.md"), "/src", "/dest", False), c) class PluginTextCase(unittest.TestCase): - def test_simple_case(self): + def test_no_includes(self): plugin = CodeIncludePlugin() - result = plugin.on_page_markdown(MARKDOWN_EXAMPLE, PAGE_EXAMPLE, dict()) + result = plugin.on_page_markdown(MARKDOWN_EXAMPLE_NO_INCLUDES, PAGE_EXAMPLE, dict()) + + self.assertEqual(MARKDOWN_EXAMPLE_NO_INCLUDES.strip(), + result.strip()) + + def test_simple_case_no_selector(self): + plugin = CodeIncludePlugin() + result = plugin.on_page_markdown(MARKDOWN_EXAMPLE_NO_SELECTOR, PAGE_EXAMPLE, dict()) print(result) self.assertEqual(textwrap.dedent(""" @@ -59,6 +115,63 @@ def test_simple_case(self): """).strip(), result.strip()) + @unittest.skip("https://github.com/rnorth/mkdocs-codeinclude-plugin/issues/13") + def test_simple_case_right_curly_inside_block(self): + plugin = CodeIncludePlugin() + result = plugin.on_page_markdown(MARKDOWN_EXAMPLE_RIGHT_CURLY, PAGE_EXAMPLE, dict()) + + print(result) + self.assertEqual(textwrap.dedent(r""" + # hello world + + + ```java tab="Curly" + public class Curly { + public static String RIGHT_CURLY_REGEX = "\\}"; + } + + ``` + """).strip(), + result.strip()) + + def test_simple_case_selector_on_same_line(self): + plugin = CodeIncludePlugin() + result = plugin.on_page_markdown(MARKDOWN_EXAMPLE_SELECTOR_ON_SAME_LINE, PAGE_EXAMPLE, dict()) + + print(result) + self.assertEqual(textwrap.dedent(""" + # hello world + + some text before + + ```java tab=\"foo\" + public class Foo { + + ``` + + and some text after + """).strip(), + result.strip()) + + def test_simple_case_selector_on_next_line(self): + plugin = CodeIncludePlugin() + result = plugin.on_page_markdown(MARKDOWN_EXAMPLE_SELECTOR_ON_NEXT_LINE, PAGE_EXAMPLE, dict()) + + print(result) + self.assertEqual(textwrap.dedent(""" + # hello world + + some text before + + ```java tab=\"foo\" + public class Foo { + + ``` + + and some text after + """).strip(), + result.strip()) + def test_multi_tab_case(self): plugin = CodeIncludePlugin() result = plugin.on_page_markdown(MULTI_TAB_MARKDOWN_EXAMPLE, PAGE_EXAMPLE, dict()) @@ -78,6 +191,26 @@ def test_multi_tab_case(self): ```java tab=\"bar\" public class Bar { + // This UTF-8 encoded file has some multi-byte characters: œ, ë + } + ``` + + and some text after + """).strip(), + result.strip()) + + def test_empty_title_case(self): + plugin = CodeIncludePlugin() + result = plugin.on_page_markdown(EMPTY_TITLE_MARKDOWN_EXAMPLE, PAGE_EXAMPLE, dict()) + + print(result) + self.assertEqual(textwrap.dedent(""" + # hello world + + some text before + + ```java + public class Foo { } ``` diff --git a/tests/codeinclude/test_resolver.py b/tests/codeinclude/test_resolver.py index 25a7d6b..f9615e2 100644 --- a/tests/codeinclude/test_resolver.py +++ b/tests/codeinclude/test_resolver.py @@ -1,3 +1,6 @@ +import textwrap +import unittest + from codeinclude.resolver import select CODE_BLOCK_EXAMPLE = """ @@ -8,20 +11,202 @@ this is a trailing line """ -def test_lines(): - result = select(CODE_BLOCK_EXAMPLE, lines="2,6") - assert result == ("this is the first line\n" - "\n" - "⋯\n" - "\n" - "this is a trailing line\n") - -def test_inside_block(): - result = select(CODE_BLOCK_EXAMPLE, inside_block="blockstarter") - assert result == " block content\n" - -def test_whole_block(): - result = select(CODE_BLOCK_EXAMPLE, block="blockstarter") - assert result == ("blockstarter {\n" - " block content\n" - "}\n") \ No newline at end of file + +class ResolverTest(unittest.TestCase): + def test_lines(self): + result = select(CODE_BLOCK_EXAMPLE, lines="2,6") + self.assertEquals(("this is the first line\n" + "\n" + "⋯\n" + "\n" + "this is a trailing line\n"), + result) + + def test_inside_block(self): + result = select(CODE_BLOCK_EXAMPLE, inside_block="blockstarter") + self.assertEquals(" block content\n", result) + + def test_whole_block(self): + result = select(CODE_BLOCK_EXAMPLE, block="blockstarter") + self.assertEquals(("blockstarter {\n" + " block content\n" + "}\n"), + result) + + def test_block_curly_on_same_line(self): + result = select( + textwrap.dedent( + """ + /* Before foo */ + foo { + /* {} {@code Bar} */ + } + /* After foo */ + """), + block="foo") + self.assertEquals(("foo {\n" + " /* {} {@code Bar} */\n" + "}\n"), + result) + + def test_inside_block_content_on_last_line(self): + result = select( + textwrap.dedent( + """ + foo { + if (true) { + bar(); + } } + /* The line above contains both the closing curly bracket for `if` and for `foo` */ + """), + inside_block="foo") + self.assertEquals((" if (true) {\n" + " bar();\n"), + result) + + def test_inside_block_curly_on_same_line(self): + result = select( + textwrap.dedent( + """ + foo { + /* {} */ + } + """), + inside_block="foo") + self.assertEquals(" /* {} */\n", result) + + def test_inside_block_multiple_curly_on_same_line(self): + result = select( + textwrap.dedent( + """ + // + foo { + /* {} {@code bar} {@link baz} */ + } + """), + inside_block="foo") + self.assertEquals(" /* {} {@code bar} {@link baz} */\n", result) + + def test_inside_block_in_a_block(self): + result = select( + textwrap.dedent( + """ + {{{ + foo { + /* inside foo */ + } + }}} + """), + inside_block="foo") + self.assertEquals(" /* inside foo */\n", result) + + def test_inside_block_contains_keyword(self): + result = select( + textwrap.dedent( + """ + /* Some code before {} */ + first { + /* first */ + first(); + if (first()) { + first(); + } else { + first(); + } + } + /* Some code after {} */ + """), + inside_block="first") + self.maxDiff = None + self.assertEquals( +""" /* first */ + first(); + if (first()) { + first(); + } else { + first(); + } +""", + result) + + def test_inside_block_nested_matching_blocks(self): + result = select( + textwrap.dedent( + """ + /* Some code before {} */ + first { + first { + first { + /* The most deeply nested. */ + } + } + } + /* Some code after {} */ + """), + inside_block="first") + self.maxDiff = None + self.assertEquals( +""" first { + first { + /* The most deeply nested. */ + } + } +""", + result) + + def test_inside_block_multiple_blocks_first(self): + result = select( + textwrap.dedent( + """ + /* Some code before {} */ + first { + /* inside first */ + } + /* Some code in between */ + second { + /* inside second */ + } + /* Some code after {} */ + """), + inside_block="first") + self.maxDiff = None + self.assertEquals(" /* inside first */\n", result) + + def test_inside_block_multiple_blocks_second(self): + result = select( + textwrap.dedent( + """ + /* Some code before {} */ + first { + /* inside first */ + } + /* Some code in between */ + second { + /* inside second */ + } + /* Some code after {} */ + """), + inside_block="second") + self.maxDiff = None + self.assertEquals(" /* inside second */\n", result) + + def test_inside_block_several_matching_blocks(self): + result = select( + textwrap.dedent( + """ + /* Some code before {} */ + matching_block 1 { + /* inside first */ + } + /* Some code in between */ + matching_block 2 { + /* inside second */ + } + /* Some code after {} */ + """), + inside_block="matching_block") + self.maxDiff = None + self.assertEquals((" /* inside first */\n" + "\n⋯\n\n" + " /* inside second */\n"), + result)