diff --git a/CHANGELOG.md b/CHANGELOG.md index df6beb202..2fc29ab3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html with the exception that 0.x versions can break between minor versions. ## Unreleased +### Added +- Support for extending inline parsing with custom inline content parsers! See + `Parser.Builder#customInlineContentParserFactory`. This allows users or + extensions to hook into inline parsing on a deeper level than using delimiter + processors. It could be used to implement support for math/latex formulas for + example. ### Fixed - Fix parsing of link reference definitions where it looks like it has a title but it doesn't because it's followed by characters other than space/tab. In that diff --git a/README.md b/README.md index e4b07bfdc..bcf587f54 100644 --- a/README.md +++ b/README.md @@ -221,6 +221,19 @@ elements in the resulting HTML, you can create your own subclass of To define the HTML rendering for them, you can use a `NodeRenderer` as explained above. +#### Customize parsing + +There are a few ways to extend parsing or even override built-in parsing, +all of them via methods on `Parser.Builder` +(see [Blocks and inlines](https://spec.commonmark.org/0.31.2/#blocks-and-inlines) in the spec for an overview of blocks/inlines): + +- Parsing of specific block types (e.g. headings, code blocks, etc) can be + enabled/disabled with `enabledBlockTypes` +- Parsing of blocks can be extended/overridden with `customBlockParserFactory` +- Parsing of inline content can be extended/overridden with `customInlineContentParserFactory` +- Parsing of [delimiters](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis) in inline content can be + extended with `customDelimiterProcessor` + #### Thread-safety Both the `Parser` and `HtmlRenderer` are designed so that you can diff --git a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java index 2cc37e306..6884c56a9 100644 --- a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java @@ -1,5 +1,6 @@ package org.commonmark.internal; +import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.internal.util.Parsing; import org.commonmark.node.*; import org.commonmark.parser.*; @@ -66,6 +67,7 @@ public class DocumentParser implements ParserState { private final List blockParserFactories; private final InlineParserFactory inlineParserFactory; + private final List inlineContentParserFactories; private final List delimiterProcessors; private final IncludeSourceSpans includeSourceSpans; private final DocumentBlockParser documentBlockParser; @@ -75,9 +77,11 @@ public class DocumentParser implements ParserState { private final List allBlockParsers = new ArrayList<>(); public DocumentParser(List blockParserFactories, InlineParserFactory inlineParserFactory, - List delimiterProcessors, IncludeSourceSpans includeSourceSpans) { + List inlineContentParserFactories, List delimiterProcessors, + IncludeSourceSpans includeSourceSpans) { this.blockParserFactories = blockParserFactories; this.inlineParserFactory = inlineParserFactory; + this.inlineContentParserFactories = inlineContentParserFactories; this.delimiterProcessors = delimiterProcessors; this.includeSourceSpans = includeSourceSpans; @@ -477,7 +481,7 @@ private void addDefinitionsFrom(ParagraphParser paragraphParser) { * Walk through a block & children recursively, parsing string content into inline content where appropriate. */ private void processInlines() { - InlineParserContextImpl context = new InlineParserContextImpl(delimiterProcessors, definitions); + InlineParserContextImpl context = new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors, definitions); InlineParser inlineParser = inlineParserFactory.create(context); for (BlockParser blockParser : allBlockParsers) { diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java index f485614d5..689a5372e 100644 --- a/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java @@ -1,23 +1,31 @@ package org.commonmark.internal; +import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.node.LinkReferenceDefinition; import org.commonmark.parser.InlineParserContext; import org.commonmark.parser.delimiter.DelimiterProcessor; import java.util.List; -import java.util.Map; public class InlineParserContextImpl implements InlineParserContext { + private final List inlineContentParserFactories; private final List delimiterProcessors; private final LinkReferenceDefinitions linkReferenceDefinitions; - public InlineParserContextImpl(List delimiterProcessors, + public InlineParserContextImpl(List inlineContentParserFactories, + List delimiterProcessors, LinkReferenceDefinitions linkReferenceDefinitions) { + this.inlineContentParserFactories = inlineContentParserFactories; this.delimiterProcessors = delimiterProcessors; this.linkReferenceDefinitions = linkReferenceDefinitions; } + @Override + public List getCustomInlineContentParserFactories() { + return inlineContentParserFactories; + } + @Override public List getCustomDelimiterProcessors() { return delimiterProcessors; diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java index 113e80db9..5b91a5a16 100644 --- a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java @@ -7,7 +7,7 @@ import org.commonmark.parser.InlineParser; import org.commonmark.parser.InlineParserContext; import org.commonmark.parser.SourceLines; -import org.commonmark.parser.beta.Position; +import org.commonmark.parser.beta.*; import org.commonmark.parser.beta.Scanner; import org.commonmark.parser.delimiter.DelimiterProcessor; import org.commonmark.text.Characters; @@ -16,11 +16,12 @@ public class InlineParserImpl implements InlineParser, InlineParserState { - private final BitSet specialCharacters; - private final Map delimiterProcessors; private final InlineParserContext context; - private final Map> inlineParsers; + private final List inlineContentParserFactories; + private final Map delimiterProcessors; + private final BitSet specialCharacters; + private Map> inlineParsers; private Scanner scanner; private boolean includeSourceSpans; private int trailingSpaces; @@ -36,46 +37,31 @@ public class InlineParserImpl implements InlineParser, InlineParserState { */ private Bracket lastBracket; - public InlineParserImpl(InlineParserContext inlineParserContext) { - this.delimiterProcessors = calculateDelimiterProcessors(inlineParserContext.getCustomDelimiterProcessors()); - - this.context = inlineParserContext; - this.inlineParsers = new HashMap<>(); - this.inlineParsers.put('\\', Collections.singletonList(new BackslashInlineParser())); - this.inlineParsers.put('`', Collections.singletonList(new BackticksInlineParser())); - this.inlineParsers.put('&', Collections.singletonList(new EntityInlineParser())); - this.inlineParsers.put('<', Arrays.asList(new AutolinkInlineParser(), new HtmlInlineParser())); - - this.specialCharacters = calculateSpecialCharacters(this.delimiterProcessors.keySet(), inlineParsers.keySet()); + public InlineParserImpl(InlineParserContext context) { + this.context = context; + this.inlineContentParserFactories = calculateInlineContentParserFactories(context.getCustomInlineContentParserFactories()); + this.delimiterProcessors = calculateDelimiterProcessors(context.getCustomDelimiterProcessors()); + this.specialCharacters = calculateSpecialCharacters(this.delimiterProcessors.keySet(), this.inlineContentParserFactories); } - public static BitSet calculateSpecialCharacters(Set delimiterCharacters, Set characters) { - BitSet bitSet = new BitSet(); - for (Character c : delimiterCharacters) { - bitSet.set(c); - } - for (Character c : characters) { - bitSet.set(c); - } - bitSet.set('['); - bitSet.set(']'); - bitSet.set('!'); - bitSet.set('\n'); - return bitSet; + private List calculateInlineContentParserFactories(List customFactories) { + // Custom parsers can override built-in parsers if they want, so make sure they are tried first + var list = new ArrayList<>(customFactories); + list.add(new BackslashInlineParser.Factory()); + list.add(new BackticksInlineParser.Factory()); + list.add(new EntityInlineParser.Factory()); + list.add(new AutolinkInlineParser.Factory()); + list.add(new HtmlInlineParser.Factory()); + return list; } - public static Map calculateDelimiterProcessors(List delimiterProcessors) { - Map map = new HashMap<>(); - addDelimiterProcessors(Arrays.asList(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map); + private static Map calculateDelimiterProcessors(List delimiterProcessors) { + var map = new HashMap(); + addDelimiterProcessors(List.of(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map); addDelimiterProcessors(delimiterProcessors, map); return map; } - @Override - public Scanner scanner() { - return scanner; - } - private static void addDelimiterProcessors(Iterable delimiterProcessors, Map map) { for (DelimiterProcessor delimiterProcessor : delimiterProcessors) { char opening = delimiterProcessor.getOpeningCharacter(); @@ -109,6 +95,40 @@ private static void addDelimiterProcessorForChar(char delimiterChar, DelimiterPr } } + private static BitSet calculateSpecialCharacters(Set delimiterCharacters, + List inlineContentParserFactories) { + BitSet bitSet = new BitSet(); + for (Character c : delimiterCharacters) { + bitSet.set(c); + } + for (var factory : inlineContentParserFactories) { + for (var c : factory.getTriggerCharacters()) { + bitSet.set(c); + } + } + bitSet.set('['); + bitSet.set(']'); + bitSet.set('!'); + bitSet.set('\n'); + return bitSet; + } + + private Map> createInlineContentParsers() { + var map = new HashMap>(); + for (var factory : inlineContentParserFactories) { + var parser = factory.create(); + for (var c : factory.getTriggerCharacters()) { + map.computeIfAbsent(c, k -> new ArrayList<>()).add(parser); + } + } + return map; + } + + @Override + public Scanner scanner() { + return scanner; + } + /** * Parse content in block into inline children, appending them to the block node. */ @@ -117,14 +137,13 @@ public void parse(SourceLines lines, Node block) { reset(lines); while (true) { - List nodes = parseInline(); - if (nodes != null) { - for (Node node : nodes) { - block.appendChild(node); - } - } else { + var nodes = parseInline(); + if (nodes == null) { break; } + for (Node node : nodes) { + block.appendChild(node); + } } processDelimiters(null); @@ -137,6 +156,7 @@ void reset(SourceLines lines) { this.trailingSpaces = 0; this.lastDelimiter = null; this.lastBracket = null; + this.inlineParsers = createInlineContentParsers(); } private Text text(SourceLines sourceLines) { @@ -155,20 +175,20 @@ private List parseInline() { switch (c) { case '[': - return Collections.singletonList(parseOpenBracket()); + return List.of(parseOpenBracket()); case '!': - return Collections.singletonList(parseBang()); + return List.of(parseBang()); case ']': - return Collections.singletonList(parseCloseBracket()); + return List.of(parseCloseBracket()); case '\n': - return Collections.singletonList(parseLineBreak()); + return List.of(parseLineBreak()); case Scanner.END: return null; } // No inline parser, delimiter or other special handling. if (!specialCharacters.get(c)) { - return Collections.singletonList(parseText()); + return List.of(parseText()); } List inlineParsers = this.inlineParsers.get(c); @@ -183,7 +203,7 @@ private List parseInline() { if (includeSourceSpans && node.getSourceSpans().isEmpty()) { node.setSourceSpans(scanner.getSource(position, scanner.position()).getSourceSpans()); } - return Collections.singletonList(node); + return List.of(node); } else { // Reset position scanner.setPosition(position); @@ -200,7 +220,7 @@ private List parseInline() { } // If we get here, even for a special/delimiter character, we will just treat it as text. - return Collections.singletonList(parseText()); + return List.of(parseText()); } /** diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java index 36c43e196..a18966e54 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java @@ -3,9 +3,9 @@ import org.commonmark.node.Link; import org.commonmark.node.Text; import org.commonmark.parser.SourceLines; -import org.commonmark.parser.beta.Position; -import org.commonmark.parser.beta.Scanner; +import org.commonmark.parser.beta.*; +import java.util.Set; import java.util.regex.Pattern; /** @@ -46,4 +46,16 @@ public ParsedInline tryParse(InlineParserState inlineParserState) { } return ParsedInline.none(); } + + public static class Factory implements InlineContentParserFactory { + @Override + public Set getTriggerCharacters() { + return Set.of('<'); + } + + @Override + public InlineContentParser create() { + return new AutolinkInlineParser(); + } + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java index 02c136951..7baeed4de 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java @@ -3,8 +3,9 @@ import org.commonmark.internal.util.Escaping; import org.commonmark.node.HardLineBreak; import org.commonmark.node.Text; -import org.commonmark.parser.beta.Scanner; +import org.commonmark.parser.beta.*; +import java.util.Set; import java.util.regex.Pattern; /** @@ -32,4 +33,16 @@ public ParsedInline tryParse(InlineParserState inlineParserState) { return ParsedInline.of(new Text("\\"), scanner.position()); } } + + public static class Factory implements InlineContentParserFactory { + @Override + public Set getTriggerCharacters() { + return Set.of('\\'); + } + + @Override + public InlineContentParser create() { + return new BackslashInlineParser(); + } + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java index bef8e1f99..b8e8984e8 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java @@ -3,10 +3,11 @@ import org.commonmark.node.Code; import org.commonmark.node.Text; import org.commonmark.parser.SourceLines; -import org.commonmark.parser.beta.Position; -import org.commonmark.parser.beta.Scanner; +import org.commonmark.parser.beta.*; import org.commonmark.text.Characters; +import java.util.Set; + /** * Attempt to parse backticks, returning either a backtick code span or a literal sequence of backticks. */ @@ -47,4 +48,16 @@ public ParsedInline tryParse(InlineParserState inlineParserState) { Text text = new Text(source.getContent()); return ParsedInline.of(text, afterOpening); } + + public static class Factory implements InlineContentParserFactory { + @Override + public Set getTriggerCharacters() { + return Set.of('`'); + } + + @Override + public InlineContentParser create() { + return new BackticksInlineParser(); + } + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java index 2b7d296fb..c24e60747 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java @@ -1,13 +1,14 @@ package org.commonmark.internal.inline; -import org.commonmark.text.AsciiMatcher; import org.commonmark.internal.util.Html5Entities; import org.commonmark.node.Text; -import org.commonmark.parser.beta.Position; -import org.commonmark.parser.beta.Scanner; +import org.commonmark.parser.beta.*; +import org.commonmark.text.AsciiMatcher; + +import java.util.Set; /** - * Attempts to parse a HTML entity or numeric character reference. + * Attempts to parse an HTML entity or numeric character reference. */ public class EntityInlineParser implements InlineContentParser { @@ -52,4 +53,17 @@ private ParsedInline entity(Scanner scanner, Position start) { String text = scanner.getSource(start, scanner.position()).getContent(); return ParsedInline.of(new Text(Html5Entities.entityToString(text)), scanner.position()); } + + public static class Factory implements InlineContentParserFactory { + + @Override + public Set getTriggerCharacters() { + return Set.of('&'); + } + + @Override + public InlineContentParser create() { + return new EntityInlineParser(); + } + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java index 6dc525cb9..a48ea5022 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java @@ -1,9 +1,10 @@ package org.commonmark.internal.inline; -import org.commonmark.text.AsciiMatcher; import org.commonmark.node.HtmlInline; -import org.commonmark.parser.beta.Position; -import org.commonmark.parser.beta.Scanner; +import org.commonmark.parser.beta.*; +import org.commonmark.text.AsciiMatcher; + +import java.util.Set; /** * Attempt to parse inline HTML. @@ -200,4 +201,17 @@ private static boolean tryDeclaration(Scanner scanner) { } return false; } + + public static class Factory implements InlineContentParserFactory { + + @Override + public Set getTriggerCharacters() { + return Set.of('<'); + } + + @Override + public InlineContentParser create() { + return new HtmlInlineParser(); + } + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java deleted file mode 100644 index 755ee3135..000000000 --- a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java +++ /dev/null @@ -1,6 +0,0 @@ -package org.commonmark.internal.inline; - -public interface InlineContentParser { - - ParsedInline tryParse(InlineParserState inlineParserState); -} diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInline.java b/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInline.java deleted file mode 100644 index 7223c1687..000000000 --- a/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInline.java +++ /dev/null @@ -1,24 +0,0 @@ -package org.commonmark.internal.inline; - -import org.commonmark.node.Node; -import org.commonmark.parser.beta.Position; - -public abstract class ParsedInline { - - protected ParsedInline() { - } - - public static ParsedInline none() { - return null; - } - - public static ParsedInline of(Node node, Position position) { - if (node == null) { - throw new NullPointerException("node must not be null"); - } - if (position == null) { - throw new NullPointerException("position must not be null"); - } - return new ParsedInlineImpl(node, position); - } -} diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInlineImpl.java b/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInlineImpl.java index 55f9cc4da..a77630610 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInlineImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInlineImpl.java @@ -1,13 +1,14 @@ package org.commonmark.internal.inline; import org.commonmark.node.Node; +import org.commonmark.parser.beta.ParsedInline; import org.commonmark.parser.beta.Position; -public class ParsedInlineImpl extends ParsedInline { +public class ParsedInlineImpl implements ParsedInline { private final Node node; private final Position position; - ParsedInlineImpl(Node node, Position position) { + public ParsedInlineImpl(Node node, Position position) { this.node = node; this.position = position; } diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java index dae96e2c8..dde86b311 100644 --- a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java +++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java @@ -1,6 +1,7 @@ package org.commonmark.parser; import org.commonmark.node.LinkReferenceDefinition; +import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.parser.delimiter.DelimiterProcessor; import java.util.List; @@ -11,7 +12,14 @@ public interface InlineParserContext { /** - * @return custom delimiter processors that have been configured with {@link Parser.Builder#customDelimiterProcessor(DelimiterProcessor)} + * @return custom inline content parsers that have been configured with + * {@link Parser.Builder#customInlineContentParserFactory(InlineContentParserFactory)} + */ + List getCustomInlineContentParserFactories(); + + /** + * @return custom delimiter processors that have been configured with + * {@link Parser.Builder#customDelimiterProcessor(DelimiterProcessor)} */ List getCustomDelimiterProcessors(); diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java index 34c384a8a..c1640e9d8 100644 --- a/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java +++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java @@ -4,5 +4,9 @@ * Factory for custom inline parser. */ public interface InlineParserFactory { + + /** + * Create an {@link InlineParser} to use for parsing inlines. This is called once per parsed document. + */ InlineParser create(InlineParserContext inlineParserContext); } diff --git a/commonmark/src/main/java/org/commonmark/parser/Parser.java b/commonmark/src/main/java/org/commonmark/parser/Parser.java index 89cdd584c..febe05b7c 100644 --- a/commonmark/src/main/java/org/commonmark/parser/Parser.java +++ b/commonmark/src/main/java/org/commonmark/parser/Parser.java @@ -6,6 +6,7 @@ import org.commonmark.internal.InlineParserImpl; import org.commonmark.internal.LinkReferenceDefinitions; import org.commonmark.node.*; +import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.parser.block.BlockParserFactory; import org.commonmark.parser.delimiter.DelimiterProcessor; @@ -13,6 +14,7 @@ import java.io.Reader; import java.util.ArrayList; import java.util.List; +import java.util.Objects; import java.util.Set; @@ -28,6 +30,7 @@ public class Parser { private final List blockParserFactories; + private final List inlineContentParserFactories; private final List delimiterProcessors; private final InlineParserFactory inlineParserFactory; private final List postProcessors; @@ -37,12 +40,13 @@ private Parser(Builder builder) { this.blockParserFactories = DocumentParser.calculateBlockParserFactories(builder.blockParserFactories, builder.enabledBlockTypes); this.inlineParserFactory = builder.getInlineParserFactory(); this.postProcessors = builder.postProcessors; + this.inlineContentParserFactories = builder.inlineContentParserFactories; this.delimiterProcessors = builder.delimiterProcessors; this.includeSourceSpans = builder.includeSourceSpans; // Try to construct an inline parser. Invalid configuration might result in an exception, which we want to // detect as soon as possible. - this.inlineParserFactory.create(new InlineParserContextImpl(delimiterProcessors, new LinkReferenceDefinitions())); + this.inlineParserFactory.create(new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors, new LinkReferenceDefinitions())); } /** @@ -100,7 +104,7 @@ public Node parseReader(Reader input) throws IOException { } private DocumentParser createDocumentParser() { - return new DocumentParser(blockParserFactories, inlineParserFactory, delimiterProcessors, includeSourceSpans); + return new DocumentParser(blockParserFactories, inlineParserFactory, inlineContentParserFactories, delimiterProcessors, includeSourceSpans); } private Node postProcess(Node document) { @@ -115,6 +119,7 @@ private Node postProcess(Node document) { */ public static class Builder { private final List blockParserFactories = new ArrayList<>(); + private final List inlineContentParserFactories = new ArrayList<>(); private final List delimiterProcessors = new ArrayList<>(); private final List postProcessors = new ArrayList<>(); private Set> enabledBlockTypes = DocumentParser.getDefaultBlockParserTypes(); @@ -169,7 +174,7 @@ public Builder extensions(Iterable extensions) { * * * @param enabledBlockTypes A list of block nodes the parser will parse. - * If this list is empty, the parser will not recognize any CommonMark core features. + * If this list is empty, the parser will not recognize any CommonMark core features. * @return {@code this} */ public Builder enabledBlockTypes(Set> enabledBlockTypes) { @@ -196,7 +201,7 @@ public Builder includeSourceSpans(IncludeSourceSpans includeSourceSpans) { } /** - * Adds a custom block parser factory. + * Add a custom block parser factory. *

* Note that custom factories are applied before the built-in factories. This is so that * extensions can change how some syntax is parsed that would otherwise be handled by built-in factories. @@ -214,11 +219,28 @@ public Builder customBlockParserFactory(BlockParserFactory blockParserFactory) { } /** - * Adds a custom delimiter processor. + * Add a factory for a custom inline content parser, for extending inline parsing or overriding built-in parsing. + *

+ * Note that parsers are triggered based on a special character as specified by + * {@link InlineContentParserFactory#getTriggerCharacters()}. It is possible to register multiple parsers for the same + * character, or even for some built-in special character such as {@code `}. The custom parsers are tried first + * in order in which they are registered, and then the built-in ones. + */ + public Builder customInlineContentParserFactory(InlineContentParserFactory inlineContentParserFactory) { + Objects.requireNonNull(inlineContentParserFactory, "inlineContentParser must not be null"); + inlineContentParserFactories.add(inlineContentParserFactory); + return this; + } + + /** + * Add a custom delimiter processor for inline parsing. *

* Note that multiple delimiter processors with the same characters can be added, as long as they have a * different minimum length. In that case, the processor with the shortest matching length is used. Adding more * than one delimiter processor with the same character and minimum length is invalid. + *

+ * If you want more control over how parsing is done, you might want to use + * {@link #customInlineContentParserFactory} instead. * * @param delimiterProcessor a delimiter processor implementation * @return {@code this} @@ -263,15 +285,7 @@ public Builder inlineParserFactory(InlineParserFactory inlineParserFactory) { } private InlineParserFactory getInlineParserFactory() { - if (inlineParserFactory != null) { - return inlineParserFactory; - } - return new InlineParserFactory() { - @Override - public InlineParser create(InlineParserContext inlineParserContext) { - return new InlineParserImpl(inlineParserContext); - } - }; + return Objects.requireNonNullElseGet(inlineParserFactory, () -> InlineParserImpl::new); } } diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParser.java b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParser.java new file mode 100644 index 000000000..bc5c9a54f --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParser.java @@ -0,0 +1,21 @@ +package org.commonmark.parser.beta; + +/** + * Parser for a type of inline content. Registered via a {@link InlineContentParserFactory} and created by its + * {@link InlineContentParserFactory#create() create} method. The lifetime of this is tied to each inline content + * snippet that is parsed, as a new instance is created for each. + */ +public interface InlineContentParser { + + /** + * Try to parse inline content starting from the current position. Note that the character at the current position + * is one of {@link InlineContentParserFactory#getTriggerCharacters()} of the factory that created this parser. + *

+ * For a given inline content snippet that is being parsed, this method can be called multiple times: each time a + * trigger character is encountered. + * + * @param inlineParserState the current state of the inline parser + * @return the result of parsing; can indicate that this parser is not interested, or that parsing was successful + */ + ParsedInline tryParse(InlineParserState inlineParserState); +} diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java new file mode 100644 index 000000000..c86f93a41 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java @@ -0,0 +1,24 @@ +package org.commonmark.parser.beta; + +import java.util.Set; + +/** + * A factory for extending inline content parsing. + *

+ * See {@link org.commonmark.parser.Parser.Builder#customInlineContentParserFactory} for how to register it. + */ +public interface InlineContentParserFactory { + + /** + * An inline content parser needs to have a special "trigger" character which activates it. When this character is + * encountered during inline parsing, {@link InlineContentParser#tryParse} is called with the current parser state. + * It can also register for more than one trigger character. + */ + Set getTriggerCharacters(); + + /** + * Create an {@link InlineContentParser} that will do the parsing. Create is called once per text snippet of inline + * content inside block structures, and then called each time a trigger character is encountered. + */ + InlineContentParser create(); +} diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/InlineParserState.java b/commonmark/src/main/java/org/commonmark/parser/beta/InlineParserState.java similarity index 62% rename from commonmark/src/main/java/org/commonmark/internal/inline/InlineParserState.java rename to commonmark/src/main/java/org/commonmark/parser/beta/InlineParserState.java index ea8689be5..e434d45d6 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/InlineParserState.java +++ b/commonmark/src/main/java/org/commonmark/parser/beta/InlineParserState.java @@ -1,13 +1,10 @@ -package org.commonmark.internal.inline; - -import org.commonmark.parser.beta.Position; -import org.commonmark.parser.beta.Scanner; +package org.commonmark.parser.beta; public interface InlineParserState { /** - * Return a scanner for the input for the current position (on the character that the inline parser registered - * interest for). + * Return a scanner for the input for the current position (on the trigger character that the inline parser was + * added for). *

* Note that this always returns the same instance, if you want to backtrack you need to use * {@link Scanner#position()} and {@link Scanner#setPosition(Position)}. diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/ParsedInline.java b/commonmark/src/main/java/org/commonmark/parser/beta/ParsedInline.java new file mode 100644 index 000000000..5d1402cae --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/beta/ParsedInline.java @@ -0,0 +1,24 @@ +package org.commonmark.parser.beta; + +import org.commonmark.internal.inline.ParsedInlineImpl; +import org.commonmark.node.Node; + +import java.util.Objects; + +/** + * The result of a single inline parser. Use the static methods to create instances. + *

+ * This interface is not intended to be implemented by clients. + */ +public interface ParsedInline { + + static ParsedInline none() { + return null; + } + + static ParsedInline of(Node node, Position position) { + Objects.requireNonNull(node, "node must not be null"); + Objects.requireNonNull(position, "position must not be null"); + return new ParsedInlineImpl(node, position); + } +} diff --git a/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java b/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java index 897943d66..3b6abf214 100644 --- a/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java +++ b/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java @@ -6,6 +6,8 @@ * Custom delimiter processor for additional delimiters besides {@code _} and {@code *}. *

* Note that implementations of this need to be thread-safe, the same instance may be used by multiple parsers. + * + * @see org.commonmark.parser.beta.InlineContentParserFactory */ public interface DelimiterProcessor { diff --git a/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java b/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java new file mode 100644 index 000000000..28e9b5748 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java @@ -0,0 +1,87 @@ +package org.commonmark.parser; + +import org.commonmark.node.CustomNode; +import org.commonmark.node.Heading; +import org.commonmark.parser.beta.InlineContentParser; +import org.commonmark.parser.beta.InlineContentParserFactory; +import org.commonmark.parser.beta.InlineParserState; +import org.commonmark.parser.beta.ParsedInline; +import org.commonmark.test.Nodes; +import org.junit.Test; + +import java.util.Set; + +import static org.junit.Assert.assertEquals; + +public class InlineContentParserTest { + + @Test + public void customInlineContentParser() { + var parser = Parser.builder().customInlineContentParserFactory(new DollarInlineParser.Factory()).build(); + var doc = parser.parse("Test: $hey *there*$ $you$\n\n# Heading $heading$\n"); + var inline1 = Nodes.find(doc, DollarInline.class); + assertEquals("hey *there*", inline1.getLiteral()); + + var inline2 = (DollarInline) doc.getFirstChild().getLastChild(); + assertEquals("you", inline2.getLiteral()); + + var heading = Nodes.find(doc, Heading.class); + var inline3 = (DollarInline) heading.getLastChild(); + assertEquals("heading", inline3.getLiteral()); + + // Parser is created for each inline snippet, which is why the index resets for the second snippet. + assertEquals(0, inline1.getIndex()); + assertEquals(1, inline2.getIndex()); + assertEquals(0, inline3.getIndex()); + } + + private static class DollarInline extends CustomNode { + private final String literal; + private final int index; + + public DollarInline(String literal, int index) { + this.literal = literal; + this.index = index; + } + + public String getLiteral() { + return literal; + } + + public int getIndex() { + return index; + } + } + + private static class DollarInlineParser implements InlineContentParser { + + private int index = 0; + + @Override + public ParsedInline tryParse(InlineParserState inlineParserState) { + var scanner = inlineParserState.scanner(); + scanner.next(); + var pos = scanner.position(); + + var end = scanner.find('$'); + if (end == -1) { + return ParsedInline.none(); + } + var content = scanner.getSource(pos, scanner.position()).getContent(); + scanner.next(); + return ParsedInline.of(new DollarInline(content, index++), scanner.position()); + } + + static class Factory implements InlineContentParserFactory { + @Override + public Set getTriggerCharacters() { + return Set.of('$'); + } + + @Override + public InlineContentParser create() { + return new DollarInlineParser(); + } + } + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java b/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java index b7d083df3..9fa7fb0da 100644 --- a/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java +++ b/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java @@ -1,6 +1,7 @@ package org.commonmark.test; import org.commonmark.internal.InlineParserImpl; +import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.node.LinkReferenceDefinition; import org.commonmark.parser.InlineParser; import org.commonmark.parser.InlineParserContext; @@ -41,6 +42,11 @@ static class CapturingInlineParserFactory implements InlineParserFactory { @Override public InlineParser create(final InlineParserContext inlineParserContext) { InlineParserContext wrappedContext = new InlineParserContext() { + @Override + public List getCustomInlineContentParserFactories() { + return inlineParserContext.getCustomInlineContentParserFactories(); + } + @Override public List getCustomDelimiterProcessors() { return inlineParserContext.getCustomDelimiterProcessors();