From d8fce115d75114586f233ae9e74c9ac20753eeb1 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Tue, 16 Apr 2024 00:02:37 +1000 Subject: [PATCH 01/10] Add getTriggerCharacter to InlineContentParser, calculate inline parsers --- .../commonmark/internal/InlineParserImpl.java | 63 ++++++++++--------- .../internal/inline/AutolinkInlineParser.java | 5 ++ .../inline/BackslashInlineParser.java | 5 ++ .../inline/BackticksInlineParser.java | 5 ++ .../internal/inline/EntityInlineParser.java | 5 ++ .../internal/inline/HtmlInlineParser.java | 5 ++ .../internal/inline/InlineContentParser.java | 13 ++++ .../internal/inline/InlineParserState.java | 4 +- 8 files changed, 73 insertions(+), 32 deletions(-) diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java index 113e80db9..e5a07091b 100644 --- a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java @@ -16,10 +16,10 @@ public class InlineParserImpl implements InlineParser, InlineParserState { - private final BitSet specialCharacters; - private final Map delimiterProcessors; private final InlineParserContext context; private final Map> inlineParsers; + private final Map delimiterProcessors; + private final BitSet specialCharacters; private Scanner scanner; private boolean includeSourceSpans; @@ -37,45 +37,28 @@ public class InlineParserImpl implements InlineParser, InlineParserState { private Bracket lastBracket; public InlineParserImpl(InlineParserContext inlineParserContext) { - this.delimiterProcessors = calculateDelimiterProcessors(inlineParserContext.getCustomDelimiterProcessors()); - this.context = inlineParserContext; - this.inlineParsers = new HashMap<>(); - this.inlineParsers.put('\\', Collections.singletonList(new BackslashInlineParser())); - this.inlineParsers.put('`', Collections.singletonList(new BackticksInlineParser())); - this.inlineParsers.put('&', Collections.singletonList(new EntityInlineParser())); - this.inlineParsers.put('<', Arrays.asList(new AutolinkInlineParser(), new HtmlInlineParser())); - + this.inlineParsers = calculateInlineContentParsers(); + this.delimiterProcessors = calculateDelimiterProcessors(inlineParserContext.getCustomDelimiterProcessors()); this.specialCharacters = calculateSpecialCharacters(this.delimiterProcessors.keySet(), inlineParsers.keySet()); } - public static BitSet calculateSpecialCharacters(Set delimiterCharacters, Set characters) { - BitSet bitSet = new BitSet(); - for (Character c : delimiterCharacters) { - bitSet.set(c); + private static Map> calculateInlineContentParsers() { + var map = new HashMap>(); + for (var parser : List.of(new BackslashInlineParser(), new BackticksInlineParser(), new EntityInlineParser(), + new AutolinkInlineParser(), new HtmlInlineParser())) { + map.computeIfAbsent(parser.getTriggerCharacter(), k -> new ArrayList<>()).add(parser); } - for (Character c : characters) { - bitSet.set(c); - } - bitSet.set('['); - bitSet.set(']'); - bitSet.set('!'); - bitSet.set('\n'); - return bitSet; + return map; } - public static Map calculateDelimiterProcessors(List delimiterProcessors) { - Map map = new HashMap<>(); - addDelimiterProcessors(Arrays.asList(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map); + private static Map calculateDelimiterProcessors(List delimiterProcessors) { + var map = new HashMap(); + addDelimiterProcessors(List.of(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map); addDelimiterProcessors(delimiterProcessors, map); return map; } - @Override - public Scanner scanner() { - return scanner; - } - private static void addDelimiterProcessors(Iterable delimiterProcessors, Map map) { for (DelimiterProcessor delimiterProcessor : delimiterProcessors) { char opening = delimiterProcessor.getOpeningCharacter(); @@ -109,6 +92,26 @@ private static void addDelimiterProcessorForChar(char delimiterChar, DelimiterPr } } + private static BitSet calculateSpecialCharacters(Set delimiterCharacters, Set characters) { + BitSet bitSet = new BitSet(); + for (Character c : delimiterCharacters) { + bitSet.set(c); + } + for (Character c : characters) { + bitSet.set(c); + } + bitSet.set('['); + bitSet.set(']'); + bitSet.set('!'); + bitSet.set('\n'); + return bitSet; + } + + @Override + public Scanner scanner() { + return scanner; + } + /** * Parse content in block into inline children, appending them to the block node. */ diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java index 36c43e196..1d27f43c9 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java @@ -19,6 +19,11 @@ public class AutolinkInlineParser implements InlineContentParser { private static final Pattern EMAIL = Pattern .compile("^([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$"); + @Override + public char getTriggerCharacter() { + return '<'; + } + @Override public ParsedInline tryParse(InlineParserState inlineParserState) { Scanner scanner = inlineParserState.scanner(); diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java index 02c136951..768875174 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java @@ -15,6 +15,11 @@ public class BackslashInlineParser implements InlineContentParser { private static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); + @Override + public char getTriggerCharacter() { + return '\\'; + } + @Override public ParsedInline tryParse(InlineParserState inlineParserState) { Scanner scanner = inlineParserState.scanner(); diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java index bef8e1f99..1c12b2fd4 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java @@ -12,6 +12,11 @@ */ public class BackticksInlineParser implements InlineContentParser { + @Override + public char getTriggerCharacter() { + return '`'; + } + @Override public ParsedInline tryParse(InlineParserState inlineParserState) { Scanner scanner = inlineParserState.scanner(); diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java index 2b7d296fb..4dfd94e9f 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java @@ -16,6 +16,11 @@ public class EntityInlineParser implements InlineContentParser { private static final AsciiMatcher entityStart = AsciiMatcher.builder().range('A', 'Z').range('a', 'z').build(); private static final AsciiMatcher entityContinue = entityStart.newBuilder().range('0', '9').build(); + @Override + public char getTriggerCharacter() { + return '&'; + } + @Override public ParsedInline tryParse(InlineParserState inlineParserState) { Scanner scanner = inlineParserState.scanner(); diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java index 6dc525cb9..f776691df 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java @@ -26,6 +26,11 @@ public class HtmlInlineParser implements InlineContentParser { .c('"').c('\'').c('=').c('<').c('>').c('`') .build(); + @Override + public char getTriggerCharacter() { + return '<'; + } + @Override public ParsedInline tryParse(InlineParserState inlineParserState) { Scanner scanner = inlineParserState.scanner(); diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java index 755ee3135..2dcaf1653 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java @@ -2,5 +2,18 @@ public interface InlineContentParser { + /** + * An inline content parser needs to have a special "trigger" character which activates it. If this character is + * encountered during inline parsing, {@link #tryParse} is called with the current parser state. + */ + char getTriggerCharacter(); + + /** + * Try to parse the inline content. Note that the character at the current position is the + * {@link #getTriggerCharacter()}. + * + * @param inlineParserState the current state of the inline parser + * @return the result of parsing; can indicate that this parser is not interested, or that parsing was successful + */ ParsedInline tryParse(InlineParserState inlineParserState); } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/InlineParserState.java b/commonmark/src/main/java/org/commonmark/internal/inline/InlineParserState.java index ea8689be5..ba7369617 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/InlineParserState.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/InlineParserState.java @@ -6,8 +6,8 @@ public interface InlineParserState { /** - * Return a scanner for the input for the current position (on the character that the inline parser registered - * interest for). + * Return a scanner for the input for the current position (on the trigger character that the inline parser was + * added for). *

* Note that this always returns the same instance, if you want to backtrack you need to use * {@link Scanner#position()} and {@link Scanner#setPosition(Position)}. From 1c0259d93d14ab3a7f12a45db78d19d04c5e011a Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Tue, 16 Apr 2024 00:14:46 +1000 Subject: [PATCH 02/10] Cleanups --- .../commonmark/internal/InlineParserImpl.java | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java index e5a07091b..364484a7c 100644 --- a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java @@ -120,14 +120,13 @@ public void parse(SourceLines lines, Node block) { reset(lines); while (true) { - List nodes = parseInline(); - if (nodes != null) { - for (Node node : nodes) { - block.appendChild(node); - } - } else { + var nodes = parseInline(); + if (nodes == null) { break; } + for (Node node : nodes) { + block.appendChild(node); + } } processDelimiters(null); @@ -158,20 +157,20 @@ private List parseInline() { switch (c) { case '[': - return Collections.singletonList(parseOpenBracket()); + return List.of(parseOpenBracket()); case '!': - return Collections.singletonList(parseBang()); + return List.of(parseBang()); case ']': - return Collections.singletonList(parseCloseBracket()); + return List.of(parseCloseBracket()); case '\n': - return Collections.singletonList(parseLineBreak()); + return List.of(parseLineBreak()); case Scanner.END: return null; } // No inline parser, delimiter or other special handling. if (!specialCharacters.get(c)) { - return Collections.singletonList(parseText()); + return List.of(parseText()); } List inlineParsers = this.inlineParsers.get(c); @@ -186,7 +185,7 @@ private List parseInline() { if (includeSourceSpans && node.getSourceSpans().isEmpty()) { node.setSourceSpans(scanner.getSource(position, scanner.position()).getSourceSpans()); } - return Collections.singletonList(node); + return List.of(node); } else { // Reset position scanner.setPosition(position); @@ -203,7 +202,7 @@ private List parseInline() { } // If we get here, even for a special/delimiter character, we will just treat it as text. - return Collections.singletonList(parseText()); + return List.of(parseText()); } /** From 0dc0c2ececf045241dd5b79f28527124df4aef47 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Tue, 16 Apr 2024 22:23:58 +1000 Subject: [PATCH 03/10] Add customInlineContentParser and use in inline parsing --- .../commonmark/internal/DocumentParser.java | 8 ++- .../internal/InlineParserContextImpl.java | 12 +++- .../commonmark/internal/InlineParserImpl.java | 14 +++-- .../parser/InlineParserContext.java | 6 ++ .../java/org/commonmark/parser/Parser.java | 41 +++++++++----- .../parser/CustomInlineContentParserTest.java | 55 +++++++++++++++++++ .../test/InlineParserContextTest.java | 6 ++ 7 files changed, 119 insertions(+), 23 deletions(-) create mode 100644 commonmark/src/test/java/org/commonmark/parser/CustomInlineContentParserTest.java diff --git a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java index 2cc37e306..89bedf8cb 100644 --- a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java @@ -1,5 +1,6 @@ package org.commonmark.internal; +import org.commonmark.internal.inline.InlineContentParser; import org.commonmark.internal.util.Parsing; import org.commonmark.node.*; import org.commonmark.parser.*; @@ -66,6 +67,7 @@ public class DocumentParser implements ParserState { private final List blockParserFactories; private final InlineParserFactory inlineParserFactory; + private final List inlineContentParsers; private final List delimiterProcessors; private final IncludeSourceSpans includeSourceSpans; private final DocumentBlockParser documentBlockParser; @@ -75,9 +77,11 @@ public class DocumentParser implements ParserState { private final List allBlockParsers = new ArrayList<>(); public DocumentParser(List blockParserFactories, InlineParserFactory inlineParserFactory, - List delimiterProcessors, IncludeSourceSpans includeSourceSpans) { + List inlineContentParsers, List delimiterProcessors, + IncludeSourceSpans includeSourceSpans) { this.blockParserFactories = blockParserFactories; this.inlineParserFactory = inlineParserFactory; + this.inlineContentParsers = inlineContentParsers; this.delimiterProcessors = delimiterProcessors; this.includeSourceSpans = includeSourceSpans; @@ -477,7 +481,7 @@ private void addDefinitionsFrom(ParagraphParser paragraphParser) { * Walk through a block & children recursively, parsing string content into inline content where appropriate. */ private void processInlines() { - InlineParserContextImpl context = new InlineParserContextImpl(delimiterProcessors, definitions); + InlineParserContextImpl context = new InlineParserContextImpl(inlineContentParsers, delimiterProcessors, definitions); InlineParser inlineParser = inlineParserFactory.create(context); for (BlockParser blockParser : allBlockParsers) { diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java index f485614d5..7354d9b88 100644 --- a/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java @@ -1,23 +1,31 @@ package org.commonmark.internal; +import org.commonmark.internal.inline.InlineContentParser; import org.commonmark.node.LinkReferenceDefinition; import org.commonmark.parser.InlineParserContext; import org.commonmark.parser.delimiter.DelimiterProcessor; import java.util.List; -import java.util.Map; public class InlineParserContextImpl implements InlineParserContext { + private final List inlineContentParsers; private final List delimiterProcessors; private final LinkReferenceDefinitions linkReferenceDefinitions; - public InlineParserContextImpl(List delimiterProcessors, + public InlineParserContextImpl(List inlineContentParsers, + List delimiterProcessors, LinkReferenceDefinitions linkReferenceDefinitions) { + this.inlineContentParsers = inlineContentParsers; this.delimiterProcessors = delimiterProcessors; this.linkReferenceDefinitions = linkReferenceDefinitions; } + @Override + public List getCustomInlineContentParsers() { + return inlineContentParsers; + } + @Override public List getCustomDelimiterProcessors() { return delimiterProcessors; diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java index 364484a7c..82caed36c 100644 --- a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java @@ -36,15 +36,19 @@ public class InlineParserImpl implements InlineParser, InlineParserState { */ private Bracket lastBracket; - public InlineParserImpl(InlineParserContext inlineParserContext) { - this.context = inlineParserContext; - this.inlineParsers = calculateInlineContentParsers(); - this.delimiterProcessors = calculateDelimiterProcessors(inlineParserContext.getCustomDelimiterProcessors()); + public InlineParserImpl(InlineParserContext context) { + this.context = context; + this.inlineParsers = calculateInlineContentParsers(context.getCustomInlineContentParsers()); + this.delimiterProcessors = calculateDelimiterProcessors(context.getCustomDelimiterProcessors()); this.specialCharacters = calculateSpecialCharacters(this.delimiterProcessors.keySet(), inlineParsers.keySet()); } - private static Map> calculateInlineContentParsers() { + private static Map> calculateInlineContentParsers(List inlineContentParsers) { var map = new HashMap>(); + // Custom parsers can override built-in parsers if they want, so make sure they are tried first + for (var parser : inlineContentParsers) { + map.computeIfAbsent(parser.getTriggerCharacter(), k -> new ArrayList<>()).add(parser); + } for (var parser : List.of(new BackslashInlineParser(), new BackticksInlineParser(), new EntityInlineParser(), new AutolinkInlineParser(), new HtmlInlineParser())) { map.computeIfAbsent(parser.getTriggerCharacter(), k -> new ArrayList<>()).add(parser); diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java index dae96e2c8..7c41f8c9a 100644 --- a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java +++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java @@ -1,5 +1,6 @@ package org.commonmark.parser; +import org.commonmark.internal.inline.InlineContentParser; import org.commonmark.node.LinkReferenceDefinition; import org.commonmark.parser.delimiter.DelimiterProcessor; @@ -10,6 +11,11 @@ */ public interface InlineParserContext { + /** + * @return custom inline content parsers that have been configured with {@link Parser.Builder#customInlineContentParser(InlineContentParser)} + */ + List getCustomInlineContentParsers(); + /** * @return custom delimiter processors that have been configured with {@link Parser.Builder#customDelimiterProcessor(DelimiterProcessor)} */ diff --git a/commonmark/src/main/java/org/commonmark/parser/Parser.java b/commonmark/src/main/java/org/commonmark/parser/Parser.java index 89cdd584c..cb38c5b0b 100644 --- a/commonmark/src/main/java/org/commonmark/parser/Parser.java +++ b/commonmark/src/main/java/org/commonmark/parser/Parser.java @@ -5,6 +5,7 @@ import org.commonmark.internal.InlineParserContextImpl; import org.commonmark.internal.InlineParserImpl; import org.commonmark.internal.LinkReferenceDefinitions; +import org.commonmark.internal.inline.InlineContentParser; import org.commonmark.node.*; import org.commonmark.parser.block.BlockParserFactory; import org.commonmark.parser.delimiter.DelimiterProcessor; @@ -13,6 +14,7 @@ import java.io.Reader; import java.util.ArrayList; import java.util.List; +import java.util.Objects; import java.util.Set; @@ -28,6 +30,7 @@ public class Parser { private final List blockParserFactories; + private final List inlineContentParsers; private final List delimiterProcessors; private final InlineParserFactory inlineParserFactory; private final List postProcessors; @@ -37,12 +40,13 @@ private Parser(Builder builder) { this.blockParserFactories = DocumentParser.calculateBlockParserFactories(builder.blockParserFactories, builder.enabledBlockTypes); this.inlineParserFactory = builder.getInlineParserFactory(); this.postProcessors = builder.postProcessors; + this.inlineContentParsers = builder.inlineContentParsers; this.delimiterProcessors = builder.delimiterProcessors; this.includeSourceSpans = builder.includeSourceSpans; // Try to construct an inline parser. Invalid configuration might result in an exception, which we want to // detect as soon as possible. - this.inlineParserFactory.create(new InlineParserContextImpl(delimiterProcessors, new LinkReferenceDefinitions())); + this.inlineParserFactory.create(new InlineParserContextImpl(inlineContentParsers, delimiterProcessors, new LinkReferenceDefinitions())); } /** @@ -100,7 +104,7 @@ public Node parseReader(Reader input) throws IOException { } private DocumentParser createDocumentParser() { - return new DocumentParser(blockParserFactories, inlineParserFactory, delimiterProcessors, includeSourceSpans); + return new DocumentParser(blockParserFactories, inlineParserFactory, inlineContentParsers, delimiterProcessors, includeSourceSpans); } private Node postProcess(Node document) { @@ -115,6 +119,7 @@ private Node postProcess(Node document) { */ public static class Builder { private final List blockParserFactories = new ArrayList<>(); + private final List inlineContentParsers = new ArrayList<>(); private final List delimiterProcessors = new ArrayList<>(); private final List postProcessors = new ArrayList<>(); private Set> enabledBlockTypes = DocumentParser.getDefaultBlockParserTypes(); @@ -169,7 +174,7 @@ public Builder extensions(Iterable extensions) { * * * @param enabledBlockTypes A list of block nodes the parser will parse. - * If this list is empty, the parser will not recognize any CommonMark core features. + * If this list is empty, the parser will not recognize any CommonMark core features. * @return {@code this} */ public Builder enabledBlockTypes(Set> enabledBlockTypes) { @@ -196,7 +201,7 @@ public Builder includeSourceSpans(IncludeSourceSpans includeSourceSpans) { } /** - * Adds a custom block parser factory. + * Add a custom block parser factory. *

* Note that custom factories are applied before the built-in factories. This is so that * extensions can change how some syntax is parsed that would otherwise be handled by built-in factories. @@ -214,7 +219,23 @@ public Builder customBlockParserFactory(BlockParserFactory blockParserFactory) { } /** - * Adds a custom delimiter processor. + * Add a custom inline content parser, for additional inline parsing or overriding built-in parsing. + *

+ * Note that parsers are triggered based on a special character as specified by + * {@link InlineContentParser#getTriggerCharacter()}. It is possible to register multiple parsers for the same + * character, or even for some built-in special character such as {@code `}. + * + * @param inlineContentParser + * @return + */ + public Builder customInlineContentParser(InlineContentParser inlineContentParser) { + Objects.requireNonNull(inlineContentParser, "inlineContentParser must not be null"); + inlineContentParsers.add(inlineContentParser); + return this; + } + + /** + * Add a custom delimiter processor. *

* Note that multiple delimiter processors with the same characters can be added, as long as they have a * different minimum length. In that case, the processor with the shortest matching length is used. Adding more @@ -263,15 +284,7 @@ public Builder inlineParserFactory(InlineParserFactory inlineParserFactory) { } private InlineParserFactory getInlineParserFactory() { - if (inlineParserFactory != null) { - return inlineParserFactory; - } - return new InlineParserFactory() { - @Override - public InlineParser create(InlineParserContext inlineParserContext) { - return new InlineParserImpl(inlineParserContext); - } - }; + return Objects.requireNonNullElseGet(inlineParserFactory, () -> InlineParserImpl::new); } } diff --git a/commonmark/src/test/java/org/commonmark/parser/CustomInlineContentParserTest.java b/commonmark/src/test/java/org/commonmark/parser/CustomInlineContentParserTest.java new file mode 100644 index 000000000..a88053912 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/parser/CustomInlineContentParserTest.java @@ -0,0 +1,55 @@ +package org.commonmark.parser; + +import org.commonmark.internal.inline.InlineContentParser; +import org.commonmark.internal.inline.InlineParserState; +import org.commonmark.internal.inline.ParsedInline; +import org.commonmark.node.CustomNode; +import org.commonmark.test.Nodes; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class CustomInlineContentParserTest { + + @Test + public void customInlineContentParser() { + var parser = Parser.builder().customInlineContentParser(new DollarInlineContentParser()).build(); + var doc = parser.parse("Test: $hey *there*$"); + var dollarInline = Nodes.find(doc, DollarInline.class); + assertEquals("hey *there*", dollarInline.getLiteral()); + } + + private static class DollarInline extends CustomNode { + private final String literal; + + public DollarInline(String literal) { + this.literal = literal; + } + + public String getLiteral() { + return literal; + } + } + + private static class DollarInlineContentParser implements InlineContentParser { + @Override + public char getTriggerCharacter() { + return '$'; + } + + @Override + public ParsedInline tryParse(InlineParserState inlineParserState) { + var scanner = inlineParserState.scanner(); + scanner.next(); + var pos = scanner.position(); + + var end = scanner.find('$'); + if (end == -1) { + return ParsedInline.none(); + } + var content = scanner.getSource(pos, scanner.position()).getContent(); + scanner.next(); + return ParsedInline.of(new DollarInline(content), scanner.position()); + } + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java b/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java index b7d083df3..7fd875703 100644 --- a/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java +++ b/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java @@ -1,6 +1,7 @@ package org.commonmark.test; import org.commonmark.internal.InlineParserImpl; +import org.commonmark.internal.inline.InlineContentParser; import org.commonmark.node.LinkReferenceDefinition; import org.commonmark.parser.InlineParser; import org.commonmark.parser.InlineParserContext; @@ -41,6 +42,11 @@ static class CapturingInlineParserFactory implements InlineParserFactory { @Override public InlineParser create(final InlineParserContext inlineParserContext) { InlineParserContext wrappedContext = new InlineParserContext() { + @Override + public List getCustomInlineContentParsers() { + return inlineParserContext.getCustomInlineContentParsers(); + } + @Override public List getCustomDelimiterProcessors() { return inlineParserContext.getCustomDelimiterProcessors(); From 07735411f6654700c46c1b6f6d46176f0fe66dc0 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Mon, 22 Apr 2024 09:36:38 +1000 Subject: [PATCH 04/10] Add factory so that inline parsers can keep state --- .../commonmark/internal/DocumentParser.java | 10 +-- .../internal/InlineParserContextImpl.java | 12 +-- .../commonmark/internal/InlineParserImpl.java | 41 +++++---- .../internal/inline/AutolinkInlineParser.java | 17 ++-- .../inline/BackslashInlineParser.java | 17 ++-- .../inline/BackticksInlineParser.java | 17 ++-- .../internal/inline/EntityInlineParser.java | 22 +++-- .../internal/inline/HtmlInlineParser.java | 20 +++-- .../internal/inline/InlineContentParser.java | 18 ++-- .../inline/InlineContentParserFactory.java | 16 ++++ .../parser/InlineParserContext.java | 6 +- .../parser/InlineParserFactory.java | 4 + .../java/org/commonmark/parser/Parser.java | 22 ++--- .../parser/CustomInlineContentParserTest.java | 55 ------------ .../parser/InlineContentParserTest.java | 85 +++++++++++++++++++ .../test/InlineParserContextTest.java | 6 +- 16 files changed, 233 insertions(+), 135 deletions(-) create mode 100644 commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParserFactory.java delete mode 100644 commonmark/src/test/java/org/commonmark/parser/CustomInlineContentParserTest.java create mode 100644 commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java diff --git a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java index 89bedf8cb..afb6ed9dd 100644 --- a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java @@ -1,6 +1,6 @@ package org.commonmark.internal; -import org.commonmark.internal.inline.InlineContentParser; +import org.commonmark.internal.inline.InlineContentParserFactory; import org.commonmark.internal.util.Parsing; import org.commonmark.node.*; import org.commonmark.parser.*; @@ -67,7 +67,7 @@ public class DocumentParser implements ParserState { private final List blockParserFactories; private final InlineParserFactory inlineParserFactory; - private final List inlineContentParsers; + private final List inlineContentParserFactories; private final List delimiterProcessors; private final IncludeSourceSpans includeSourceSpans; private final DocumentBlockParser documentBlockParser; @@ -77,11 +77,11 @@ public class DocumentParser implements ParserState { private final List allBlockParsers = new ArrayList<>(); public DocumentParser(List blockParserFactories, InlineParserFactory inlineParserFactory, - List inlineContentParsers, List delimiterProcessors, + List inlineContentParserFactories, List delimiterProcessors, IncludeSourceSpans includeSourceSpans) { this.blockParserFactories = blockParserFactories; this.inlineParserFactory = inlineParserFactory; - this.inlineContentParsers = inlineContentParsers; + this.inlineContentParserFactories = inlineContentParserFactories; this.delimiterProcessors = delimiterProcessors; this.includeSourceSpans = includeSourceSpans; @@ -481,7 +481,7 @@ private void addDefinitionsFrom(ParagraphParser paragraphParser) { * Walk through a block & children recursively, parsing string content into inline content where appropriate. */ private void processInlines() { - InlineParserContextImpl context = new InlineParserContextImpl(inlineContentParsers, delimiterProcessors, definitions); + InlineParserContextImpl context = new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors, definitions); InlineParser inlineParser = inlineParserFactory.create(context); for (BlockParser blockParser : allBlockParsers) { diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java index 7354d9b88..c8d927246 100644 --- a/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java @@ -1,6 +1,6 @@ package org.commonmark.internal; -import org.commonmark.internal.inline.InlineContentParser; +import org.commonmark.internal.inline.InlineContentParserFactory; import org.commonmark.node.LinkReferenceDefinition; import org.commonmark.parser.InlineParserContext; import org.commonmark.parser.delimiter.DelimiterProcessor; @@ -9,21 +9,21 @@ public class InlineParserContextImpl implements InlineParserContext { - private final List inlineContentParsers; + private final List inlineContentParserFactories; private final List delimiterProcessors; private final LinkReferenceDefinitions linkReferenceDefinitions; - public InlineParserContextImpl(List inlineContentParsers, + public InlineParserContextImpl(List inlineContentParserFactories, List delimiterProcessors, LinkReferenceDefinitions linkReferenceDefinitions) { - this.inlineContentParsers = inlineContentParsers; + this.inlineContentParserFactories = inlineContentParserFactories; this.delimiterProcessors = delimiterProcessors; this.linkReferenceDefinitions = linkReferenceDefinitions; } @Override - public List getCustomInlineContentParsers() { - return inlineContentParsers; + public List getCustomInlineContentParserFactories() { + return inlineContentParserFactories; } @Override diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java index 82caed36c..3d26993db 100644 --- a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java @@ -17,10 +17,11 @@ public class InlineParserImpl implements InlineParser, InlineParserState { private final InlineParserContext context; - private final Map> inlineParsers; + private final List inlineContentParserFactories; private final Map delimiterProcessors; private final BitSet specialCharacters; + private Map> inlineParsers; private Scanner scanner; private boolean includeSourceSpans; private int trailingSpaces; @@ -38,22 +39,20 @@ public class InlineParserImpl implements InlineParser, InlineParserState { public InlineParserImpl(InlineParserContext context) { this.context = context; - this.inlineParsers = calculateInlineContentParsers(context.getCustomInlineContentParsers()); + this.inlineContentParserFactories = calculateInlineContentParserFactories(context.getCustomInlineContentParserFactories()); this.delimiterProcessors = calculateDelimiterProcessors(context.getCustomDelimiterProcessors()); - this.specialCharacters = calculateSpecialCharacters(this.delimiterProcessors.keySet(), inlineParsers.keySet()); + this.specialCharacters = calculateSpecialCharacters(this.delimiterProcessors.keySet(), this.inlineContentParserFactories); } - private static Map> calculateInlineContentParsers(List inlineContentParsers) { - var map = new HashMap>(); + private List calculateInlineContentParserFactories(List customFactories) { // Custom parsers can override built-in parsers if they want, so make sure they are tried first - for (var parser : inlineContentParsers) { - map.computeIfAbsent(parser.getTriggerCharacter(), k -> new ArrayList<>()).add(parser); - } - for (var parser : List.of(new BackslashInlineParser(), new BackticksInlineParser(), new EntityInlineParser(), - new AutolinkInlineParser(), new HtmlInlineParser())) { - map.computeIfAbsent(parser.getTriggerCharacter(), k -> new ArrayList<>()).add(parser); - } - return map; + var list = new ArrayList<>(customFactories); + list.add(new BackslashInlineParser.Factory()); + list.add(new BackticksInlineParser.Factory()); + list.add(new EntityInlineParser.Factory()); + list.add(new AutolinkInlineParser.Factory()); + list.add(new HtmlInlineParser.Factory()); + return list; } private static Map calculateDelimiterProcessors(List delimiterProcessors) { @@ -96,13 +95,14 @@ private static void addDelimiterProcessorForChar(char delimiterChar, DelimiterPr } } - private static BitSet calculateSpecialCharacters(Set delimiterCharacters, Set characters) { + private static BitSet calculateSpecialCharacters(Set delimiterCharacters, + List inlineContentParserFactories) { BitSet bitSet = new BitSet(); for (Character c : delimiterCharacters) { bitSet.set(c); } - for (Character c : characters) { - bitSet.set(c); + for (var factory : inlineContentParserFactories) { + bitSet.set(factory.getTriggerCharacter()); } bitSet.set('['); bitSet.set(']'); @@ -111,6 +111,14 @@ private static BitSet calculateSpecialCharacters(Set delimiterCharact return bitSet; } + private Map> createInlineContentParsers() { + var map = new HashMap>(); + for (var factory : inlineContentParserFactories) { + map.computeIfAbsent(factory.getTriggerCharacter(), k -> new ArrayList<>()).add(factory.create()); + } + return map; + } + @Override public Scanner scanner() { return scanner; @@ -143,6 +151,7 @@ void reset(SourceLines lines) { this.trailingSpaces = 0; this.lastDelimiter = null; this.lastBracket = null; + this.inlineParsers = createInlineContentParsers(); } private Text text(SourceLines sourceLines) { diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java index 1d27f43c9..55a0e46a1 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java @@ -19,11 +19,6 @@ public class AutolinkInlineParser implements InlineContentParser { private static final Pattern EMAIL = Pattern .compile("^([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)$"); - @Override - public char getTriggerCharacter() { - return '<'; - } - @Override public ParsedInline tryParse(InlineParserState inlineParserState) { Scanner scanner = inlineParserState.scanner(); @@ -51,4 +46,16 @@ public ParsedInline tryParse(InlineParserState inlineParserState) { } return ParsedInline.none(); } + + public static class Factory implements InlineContentParserFactory { + @Override + public char getTriggerCharacter() { + return '<'; + } + + @Override + public InlineContentParser create() { + return new AutolinkInlineParser(); + } + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java index 768875174..f2133baaa 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java @@ -15,11 +15,6 @@ public class BackslashInlineParser implements InlineContentParser { private static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); - @Override - public char getTriggerCharacter() { - return '\\'; - } - @Override public ParsedInline tryParse(InlineParserState inlineParserState) { Scanner scanner = inlineParserState.scanner(); @@ -37,4 +32,16 @@ public ParsedInline tryParse(InlineParserState inlineParserState) { return ParsedInline.of(new Text("\\"), scanner.position()); } } + + public static class Factory implements InlineContentParserFactory { + @Override + public char getTriggerCharacter() { + return '\\'; + } + + @Override + public InlineContentParser create() { + return new BackslashInlineParser(); + } + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java index 1c12b2fd4..80286f578 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java @@ -12,11 +12,6 @@ */ public class BackticksInlineParser implements InlineContentParser { - @Override - public char getTriggerCharacter() { - return '`'; - } - @Override public ParsedInline tryParse(InlineParserState inlineParserState) { Scanner scanner = inlineParserState.scanner(); @@ -52,4 +47,16 @@ public ParsedInline tryParse(InlineParserState inlineParserState) { Text text = new Text(source.getContent()); return ParsedInline.of(text, afterOpening); } + + public static class Factory implements InlineContentParserFactory { + @Override + public char getTriggerCharacter() { + return '`'; + } + + @Override + public InlineContentParser create() { + return new BackticksInlineParser(); + } + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java index 4dfd94e9f..8e45b26ce 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java @@ -1,13 +1,13 @@ package org.commonmark.internal.inline; -import org.commonmark.text.AsciiMatcher; import org.commonmark.internal.util.Html5Entities; import org.commonmark.node.Text; import org.commonmark.parser.beta.Position; import org.commonmark.parser.beta.Scanner; +import org.commonmark.text.AsciiMatcher; /** - * Attempts to parse a HTML entity or numeric character reference. + * Attempts to parse an HTML entity or numeric character reference. */ public class EntityInlineParser implements InlineContentParser { @@ -16,11 +16,6 @@ public class EntityInlineParser implements InlineContentParser { private static final AsciiMatcher entityStart = AsciiMatcher.builder().range('A', 'Z').range('a', 'z').build(); private static final AsciiMatcher entityContinue = entityStart.newBuilder().range('0', '9').build(); - @Override - public char getTriggerCharacter() { - return '&'; - } - @Override public ParsedInline tryParse(InlineParserState inlineParserState) { Scanner scanner = inlineParserState.scanner(); @@ -57,4 +52,17 @@ private ParsedInline entity(Scanner scanner, Position start) { String text = scanner.getSource(start, scanner.position()).getContent(); return ParsedInline.of(new Text(Html5Entities.entityToString(text)), scanner.position()); } + + public static class Factory implements InlineContentParserFactory { + + @Override + public char getTriggerCharacter() { + return '&'; + } + + @Override + public InlineContentParser create() { + return new EntityInlineParser(); + } + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java index f776691df..691946483 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java @@ -1,9 +1,9 @@ package org.commonmark.internal.inline; -import org.commonmark.text.AsciiMatcher; import org.commonmark.node.HtmlInline; import org.commonmark.parser.beta.Position; import org.commonmark.parser.beta.Scanner; +import org.commonmark.text.AsciiMatcher; /** * Attempt to parse inline HTML. @@ -26,11 +26,6 @@ public class HtmlInlineParser implements InlineContentParser { .c('"').c('\'').c('=').c('<').c('>').c('`') .build(); - @Override - public char getTriggerCharacter() { - return '<'; - } - @Override public ParsedInline tryParse(InlineParserState inlineParserState) { Scanner scanner = inlineParserState.scanner(); @@ -205,4 +200,17 @@ private static boolean tryDeclaration(Scanner scanner) { } return false; } + + public static class Factory implements InlineContentParserFactory { + + @Override + public char getTriggerCharacter() { + return '<'; + } + + @Override + public InlineContentParser create() { + return new HtmlInlineParser(); + } + } } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java index 2dcaf1653..e0ab413f3 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java @@ -1,16 +1,18 @@ package org.commonmark.internal.inline; +/** + * Parser for a type of inline content. Registered via a {@link InlineContentParserFactory} and created by its + * {@link InlineContentParserFactory#create() create} method. The lifetime of this is tied to each inline content + * snippet that is parsed, as a new instance is created for each. + */ public interface InlineContentParser { /** - * An inline content parser needs to have a special "trigger" character which activates it. If this character is - * encountered during inline parsing, {@link #tryParse} is called with the current parser state. - */ - char getTriggerCharacter(); - - /** - * Try to parse the inline content. Note that the character at the current position is the - * {@link #getTriggerCharacter()}. + * Try to parse inline content starting from the current position. Note that the character at the current position + * is the {@link InlineContentParserFactory#getTriggerCharacter()} of the factory that created this parser. + *

+ * For a given inline content snippet that is being parsed, this method can be called multiple times: each time a + * trigger character is encountered. * * @param inlineParserState the current state of the inline parser * @return the result of parsing; can indicate that this parser is not interested, or that parsing was successful diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParserFactory.java b/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParserFactory.java new file mode 100644 index 000000000..b8d83be66 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParserFactory.java @@ -0,0 +1,16 @@ +package org.commonmark.internal.inline; + +public interface InlineContentParserFactory { + + /** + * An inline content parser needs to have a special "trigger" character which activates it. When this character is + * encountered during inline parsing, {@link InlineContentParser#tryParse} is called with the current parser state. + */ + char getTriggerCharacter(); + + /** + * Create an {@link InlineContentParser} that will do the parsing. Create is called once per text snippet of inline + * content inside block structures, and then called each time a trigger character is encountered. + */ + InlineContentParser create(); +} diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java index 7c41f8c9a..1c2594033 100644 --- a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java +++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java @@ -1,6 +1,6 @@ package org.commonmark.parser; -import org.commonmark.internal.inline.InlineContentParser; +import org.commonmark.internal.inline.InlineContentParserFactory; import org.commonmark.node.LinkReferenceDefinition; import org.commonmark.parser.delimiter.DelimiterProcessor; @@ -12,9 +12,9 @@ public interface InlineParserContext { /** - * @return custom inline content parsers that have been configured with {@link Parser.Builder#customInlineContentParser(InlineContentParser)} + * @return custom inline content parsers that have been configured with {@link Parser.Builder#customInlineContentParser(InlineContentParserFactory)} */ - List getCustomInlineContentParsers(); + List getCustomInlineContentParserFactories(); /** * @return custom delimiter processors that have been configured with {@link Parser.Builder#customDelimiterProcessor(DelimiterProcessor)} diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java index 34c384a8a..c1640e9d8 100644 --- a/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java +++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserFactory.java @@ -4,5 +4,9 @@ * Factory for custom inline parser. */ public interface InlineParserFactory { + + /** + * Create an {@link InlineParser} to use for parsing inlines. This is called once per parsed document. + */ InlineParser create(InlineParserContext inlineParserContext); } diff --git a/commonmark/src/main/java/org/commonmark/parser/Parser.java b/commonmark/src/main/java/org/commonmark/parser/Parser.java index cb38c5b0b..75547c422 100644 --- a/commonmark/src/main/java/org/commonmark/parser/Parser.java +++ b/commonmark/src/main/java/org/commonmark/parser/Parser.java @@ -5,7 +5,7 @@ import org.commonmark.internal.InlineParserContextImpl; import org.commonmark.internal.InlineParserImpl; import org.commonmark.internal.LinkReferenceDefinitions; -import org.commonmark.internal.inline.InlineContentParser; +import org.commonmark.internal.inline.InlineContentParserFactory; import org.commonmark.node.*; import org.commonmark.parser.block.BlockParserFactory; import org.commonmark.parser.delimiter.DelimiterProcessor; @@ -30,7 +30,7 @@ public class Parser { private final List blockParserFactories; - private final List inlineContentParsers; + private final List inlineContentParserFactories; private final List delimiterProcessors; private final InlineParserFactory inlineParserFactory; private final List postProcessors; @@ -40,13 +40,13 @@ private Parser(Builder builder) { this.blockParserFactories = DocumentParser.calculateBlockParserFactories(builder.blockParserFactories, builder.enabledBlockTypes); this.inlineParserFactory = builder.getInlineParserFactory(); this.postProcessors = builder.postProcessors; - this.inlineContentParsers = builder.inlineContentParsers; + this.inlineContentParserFactories = builder.inlineContentParserFactories; this.delimiterProcessors = builder.delimiterProcessors; this.includeSourceSpans = builder.includeSourceSpans; // Try to construct an inline parser. Invalid configuration might result in an exception, which we want to // detect as soon as possible. - this.inlineParserFactory.create(new InlineParserContextImpl(inlineContentParsers, delimiterProcessors, new LinkReferenceDefinitions())); + this.inlineParserFactory.create(new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors, new LinkReferenceDefinitions())); } /** @@ -104,7 +104,7 @@ public Node parseReader(Reader input) throws IOException { } private DocumentParser createDocumentParser() { - return new DocumentParser(blockParserFactories, inlineParserFactory, inlineContentParsers, delimiterProcessors, includeSourceSpans); + return new DocumentParser(blockParserFactories, inlineParserFactory, inlineContentParserFactories, delimiterProcessors, includeSourceSpans); } private Node postProcess(Node document) { @@ -119,7 +119,7 @@ private Node postProcess(Node document) { */ public static class Builder { private final List blockParserFactories = new ArrayList<>(); - private final List inlineContentParsers = new ArrayList<>(); + private final List inlineContentParserFactories = new ArrayList<>(); private final List delimiterProcessors = new ArrayList<>(); private final List postProcessors = new ArrayList<>(); private Set> enabledBlockTypes = DocumentParser.getDefaultBlockParserTypes(); @@ -222,15 +222,15 @@ public Builder customBlockParserFactory(BlockParserFactory blockParserFactory) { * Add a custom inline content parser, for additional inline parsing or overriding built-in parsing. *

* Note that parsers are triggered based on a special character as specified by - * {@link InlineContentParser#getTriggerCharacter()}. It is possible to register multiple parsers for the same + * {@link InlineContentParserFactory#getTriggerCharacter()}. It is possible to register multiple parsers for the same * character, or even for some built-in special character such as {@code `}. * - * @param inlineContentParser + * @param inlineContentParserFactory * @return */ - public Builder customInlineContentParser(InlineContentParser inlineContentParser) { - Objects.requireNonNull(inlineContentParser, "inlineContentParser must not be null"); - inlineContentParsers.add(inlineContentParser); + public Builder customInlineContentParser(InlineContentParserFactory inlineContentParserFactory) { + Objects.requireNonNull(inlineContentParserFactory, "inlineContentParser must not be null"); + inlineContentParserFactories.add(inlineContentParserFactory); return this; } diff --git a/commonmark/src/test/java/org/commonmark/parser/CustomInlineContentParserTest.java b/commonmark/src/test/java/org/commonmark/parser/CustomInlineContentParserTest.java deleted file mode 100644 index a88053912..000000000 --- a/commonmark/src/test/java/org/commonmark/parser/CustomInlineContentParserTest.java +++ /dev/null @@ -1,55 +0,0 @@ -package org.commonmark.parser; - -import org.commonmark.internal.inline.InlineContentParser; -import org.commonmark.internal.inline.InlineParserState; -import org.commonmark.internal.inline.ParsedInline; -import org.commonmark.node.CustomNode; -import org.commonmark.test.Nodes; -import org.junit.Test; - -import static org.junit.Assert.assertEquals; - -public class CustomInlineContentParserTest { - - @Test - public void customInlineContentParser() { - var parser = Parser.builder().customInlineContentParser(new DollarInlineContentParser()).build(); - var doc = parser.parse("Test: $hey *there*$"); - var dollarInline = Nodes.find(doc, DollarInline.class); - assertEquals("hey *there*", dollarInline.getLiteral()); - } - - private static class DollarInline extends CustomNode { - private final String literal; - - public DollarInline(String literal) { - this.literal = literal; - } - - public String getLiteral() { - return literal; - } - } - - private static class DollarInlineContentParser implements InlineContentParser { - @Override - public char getTriggerCharacter() { - return '$'; - } - - @Override - public ParsedInline tryParse(InlineParserState inlineParserState) { - var scanner = inlineParserState.scanner(); - scanner.next(); - var pos = scanner.position(); - - var end = scanner.find('$'); - if (end == -1) { - return ParsedInline.none(); - } - var content = scanner.getSource(pos, scanner.position()).getContent(); - scanner.next(); - return ParsedInline.of(new DollarInline(content), scanner.position()); - } - } -} diff --git a/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java b/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java new file mode 100644 index 000000000..083914676 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java @@ -0,0 +1,85 @@ +package org.commonmark.parser; + +import org.commonmark.internal.inline.InlineContentParser; +import org.commonmark.internal.inline.InlineContentParserFactory; +import org.commonmark.internal.inline.InlineParserState; +import org.commonmark.internal.inline.ParsedInline; +import org.commonmark.node.CustomNode; +import org.commonmark.node.Heading; +import org.commonmark.test.Nodes; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class InlineContentParserTest { + + @Test + public void customInlineContentParser() { + var parser = Parser.builder().customInlineContentParser(new DollarInlineParser.Factory()).build(); + var doc = parser.parse("Test: $hey *there*$ $you$\n\n# Heading $heading$\n"); + var inline1 = Nodes.find(doc, DollarInline.class); + assertEquals("hey *there*", inline1.getLiteral()); + + var inline2 = (DollarInline) doc.getFirstChild().getLastChild(); + assertEquals("you", inline2.getLiteral()); + + var heading = Nodes.find(doc, Heading.class); + var inline3 = (DollarInline) heading.getLastChild(); + assertEquals("heading", inline3.getLiteral()); + + // Parser is created for each inline snippet, which is why the index resets for the second snippet. + assertEquals(0, inline1.getIndex()); + assertEquals(1, inline2.getIndex()); + assertEquals(0, inline3.getIndex()); + } + + private static class DollarInline extends CustomNode { + private final String literal; + private final int index; + + public DollarInline(String literal, int index) { + this.literal = literal; + this.index = index; + } + + public String getLiteral() { + return literal; + } + + public int getIndex() { + return index; + } + } + + private static class DollarInlineParser implements InlineContentParser { + + private int index = 0; + + @Override + public ParsedInline tryParse(InlineParserState inlineParserState) { + var scanner = inlineParserState.scanner(); + scanner.next(); + var pos = scanner.position(); + + var end = scanner.find('$'); + if (end == -1) { + return ParsedInline.none(); + } + var content = scanner.getSource(pos, scanner.position()).getContent(); + scanner.next(); + return ParsedInline.of(new DollarInline(content, index++), scanner.position()); + } + + static class Factory implements InlineContentParserFactory { + @Override + public char getTriggerCharacter() { + return '$'; + } + + @Override + public InlineContentParser create() { + return new DollarInlineParser(); + } + } + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java b/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java index 7fd875703..e983870c1 100644 --- a/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java +++ b/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java @@ -1,7 +1,7 @@ package org.commonmark.test; import org.commonmark.internal.InlineParserImpl; -import org.commonmark.internal.inline.InlineContentParser; +import org.commonmark.internal.inline.InlineContentParserFactory; import org.commonmark.node.LinkReferenceDefinition; import org.commonmark.parser.InlineParser; import org.commonmark.parser.InlineParserContext; @@ -43,8 +43,8 @@ static class CapturingInlineParserFactory implements InlineParserFactory { public InlineParser create(final InlineParserContext inlineParserContext) { InlineParserContext wrappedContext = new InlineParserContext() { @Override - public List getCustomInlineContentParsers() { - return inlineParserContext.getCustomInlineContentParsers(); + public List getCustomInlineContentParserFactories() { + return inlineParserContext.getCustomInlineContentParserFactories(); } @Override From e7d7bcd258d7bb4cd83c2c8c7d2d6c4b81f9b0dc Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Thu, 25 Apr 2024 23:03:48 +1000 Subject: [PATCH 05/10] Allow to specify multiple trigger characters --- .../java/org/commonmark/internal/InlineParserImpl.java | 9 +++++++-- .../internal/inline/AutolinkInlineParser.java | 5 +++-- .../internal/inline/BackslashInlineParser.java | 5 +++-- .../internal/inline/BackticksInlineParser.java | 6 ++++-- .../commonmark/internal/inline/EntityInlineParser.java | 6 ++++-- .../commonmark/internal/inline/HtmlInlineParser.java | 6 ++++-- .../internal/inline/InlineContentParser.java | 2 +- .../internal/inline/InlineContentParserFactory.java | 5 ++++- .../src/main/java/org/commonmark/parser/Parser.java | 10 ++++------ .../org/commonmark/parser/InlineContentParserTest.java | 6 ++++-- 10 files changed, 38 insertions(+), 22 deletions(-) diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java index 3d26993db..53020ebba 100644 --- a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java @@ -102,7 +102,9 @@ private static BitSet calculateSpecialCharacters(Set delimiterCharact bitSet.set(c); } for (var factory : inlineContentParserFactories) { - bitSet.set(factory.getTriggerCharacter()); + for (var c : factory.getTriggerCharacters()) { + bitSet.set(c); + } } bitSet.set('['); bitSet.set(']'); @@ -114,7 +116,10 @@ private static BitSet calculateSpecialCharacters(Set delimiterCharact private Map> createInlineContentParsers() { var map = new HashMap>(); for (var factory : inlineContentParserFactories) { - map.computeIfAbsent(factory.getTriggerCharacter(), k -> new ArrayList<>()).add(factory.create()); + var parser = factory.create(); + for (var c : factory.getTriggerCharacters()) { + map.computeIfAbsent(c, k -> new ArrayList<>()).add(parser); + } } return map; } diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java index 55a0e46a1..dd898fcd1 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java @@ -6,6 +6,7 @@ import org.commonmark.parser.beta.Position; import org.commonmark.parser.beta.Scanner; +import java.util.Set; import java.util.regex.Pattern; /** @@ -49,8 +50,8 @@ public ParsedInline tryParse(InlineParserState inlineParserState) { public static class Factory implements InlineContentParserFactory { @Override - public char getTriggerCharacter() { - return '<'; + public Set getTriggerCharacters() { + return Set.of('<'); } @Override diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java index f2133baaa..70583659f 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java @@ -5,6 +5,7 @@ import org.commonmark.node.Text; import org.commonmark.parser.beta.Scanner; +import java.util.Set; import java.util.regex.Pattern; /** @@ -35,8 +36,8 @@ public ParsedInline tryParse(InlineParserState inlineParserState) { public static class Factory implements InlineContentParserFactory { @Override - public char getTriggerCharacter() { - return '\\'; + public Set getTriggerCharacters() { + return Set.of('\\'); } @Override diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java index 80286f578..75025411c 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java @@ -7,6 +7,8 @@ import org.commonmark.parser.beta.Scanner; import org.commonmark.text.Characters; +import java.util.Set; + /** * Attempt to parse backticks, returning either a backtick code span or a literal sequence of backticks. */ @@ -50,8 +52,8 @@ public ParsedInline tryParse(InlineParserState inlineParserState) { public static class Factory implements InlineContentParserFactory { @Override - public char getTriggerCharacter() { - return '`'; + public Set getTriggerCharacters() { + return Set.of('`'); } @Override diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java index 8e45b26ce..6c226c0a5 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java @@ -6,6 +6,8 @@ import org.commonmark.parser.beta.Scanner; import org.commonmark.text.AsciiMatcher; +import java.util.Set; + /** * Attempts to parse an HTML entity or numeric character reference. */ @@ -56,8 +58,8 @@ private ParsedInline entity(Scanner scanner, Position start) { public static class Factory implements InlineContentParserFactory { @Override - public char getTriggerCharacter() { - return '&'; + public Set getTriggerCharacters() { + return Set.of('&'); } @Override diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java index 691946483..79fdd250b 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java @@ -5,6 +5,8 @@ import org.commonmark.parser.beta.Scanner; import org.commonmark.text.AsciiMatcher; +import java.util.Set; + /** * Attempt to parse inline HTML. */ @@ -204,8 +206,8 @@ private static boolean tryDeclaration(Scanner scanner) { public static class Factory implements InlineContentParserFactory { @Override - public char getTriggerCharacter() { - return '<'; + public Set getTriggerCharacters() { + return Set.of('<'); } @Override diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java index e0ab413f3..cab1d467f 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java @@ -9,7 +9,7 @@ public interface InlineContentParser { /** * Try to parse inline content starting from the current position. Note that the character at the current position - * is the {@link InlineContentParserFactory#getTriggerCharacter()} of the factory that created this parser. + * is one of {@link InlineContentParserFactory#getTriggerCharacters()} of the factory that created this parser. *

* For a given inline content snippet that is being parsed, this method can be called multiple times: each time a * trigger character is encountered. diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParserFactory.java b/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParserFactory.java index b8d83be66..a5e1fe592 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParserFactory.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParserFactory.java @@ -1,12 +1,15 @@ package org.commonmark.internal.inline; +import java.util.Set; + public interface InlineContentParserFactory { /** * An inline content parser needs to have a special "trigger" character which activates it. When this character is * encountered during inline parsing, {@link InlineContentParser#tryParse} is called with the current parser state. + * It can also register for more than one trigger character. */ - char getTriggerCharacter(); + Set getTriggerCharacters(); /** * Create an {@link InlineContentParser} that will do the parsing. Create is called once per text snippet of inline diff --git a/commonmark/src/main/java/org/commonmark/parser/Parser.java b/commonmark/src/main/java/org/commonmark/parser/Parser.java index 75547c422..cedc5ac07 100644 --- a/commonmark/src/main/java/org/commonmark/parser/Parser.java +++ b/commonmark/src/main/java/org/commonmark/parser/Parser.java @@ -219,14 +219,12 @@ public Builder customBlockParserFactory(BlockParserFactory blockParserFactory) { } /** - * Add a custom inline content parser, for additional inline parsing or overriding built-in parsing. + * Add a factory for a custom inline content parser, for additional inline parsing or overriding built-in parsing. *

* Note that parsers are triggered based on a special character as specified by - * {@link InlineContentParserFactory#getTriggerCharacter()}. It is possible to register multiple parsers for the same - * character, or even for some built-in special character such as {@code `}. - * - * @param inlineContentParserFactory - * @return + * {@link InlineContentParserFactory#getTriggerCharacters()}. It is possible to register multiple parsers for the same + * character, or even for some built-in special character such as {@code `}. The custom parsers are tried first + * in order in which they are registered, and then the built-in ones. */ public Builder customInlineContentParser(InlineContentParserFactory inlineContentParserFactory) { Objects.requireNonNull(inlineContentParserFactory, "inlineContentParser must not be null"); diff --git a/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java b/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java index 083914676..212c9a25d 100644 --- a/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java +++ b/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java @@ -9,6 +9,8 @@ import org.commonmark.test.Nodes; import org.junit.Test; +import java.util.Set; + import static org.junit.Assert.assertEquals; public class InlineContentParserTest { @@ -72,8 +74,8 @@ public ParsedInline tryParse(InlineParserState inlineParserState) { static class Factory implements InlineContentParserFactory { @Override - public char getTriggerCharacter() { - return '$'; + public Set getTriggerCharacters() { + return Set.of('$'); } @Override From eeb077623e47047646130005151faa9376ea8ab4 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Fri, 26 Apr 2024 17:44:19 +1000 Subject: [PATCH 06/10] Move inline content parser to beta API package --- .../commonmark/internal/DocumentParser.java | 2 +- .../internal/InlineParserContextImpl.java | 2 +- .../commonmark/internal/InlineParserImpl.java | 2 +- .../internal/inline/AutolinkInlineParser.java | 3 +-- .../inline/BackslashInlineParser.java | 2 +- .../inline/BackticksInlineParser.java | 3 +-- .../internal/inline/EntityInlineParser.java | 3 +-- .../internal/inline/HtmlInlineParser.java | 3 +-- .../internal/inline/ParsedInline.java | 24 ------------------- .../internal/inline/ParsedInlineImpl.java | 5 ++-- .../parser/InlineParserContext.java | 2 +- .../java/org/commonmark/parser/Parser.java | 2 +- .../beta}/InlineContentParser.java | 2 +- .../beta}/InlineContentParserFactory.java | 2 +- .../beta}/InlineParserState.java | 5 +--- .../commonmark/parser/beta/ParsedInline.java | 24 +++++++++++++++++++ .../parser/InlineContentParserTest.java | 8 +++---- .../test/InlineParserContextTest.java | 2 +- 18 files changed, 45 insertions(+), 51 deletions(-) delete mode 100644 commonmark/src/main/java/org/commonmark/internal/inline/ParsedInline.java rename commonmark/src/main/java/org/commonmark/{internal/inline => parser/beta}/InlineContentParser.java (96%) rename commonmark/src/main/java/org/commonmark/{internal/inline => parser/beta}/InlineContentParserFactory.java (94%) rename commonmark/src/main/java/org/commonmark/{internal/inline => parser/beta}/InlineParserState.java (75%) create mode 100644 commonmark/src/main/java/org/commonmark/parser/beta/ParsedInline.java diff --git a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java index afb6ed9dd..6884c56a9 100644 --- a/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/DocumentParser.java @@ -1,6 +1,6 @@ package org.commonmark.internal; -import org.commonmark.internal.inline.InlineContentParserFactory; +import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.internal.util.Parsing; import org.commonmark.node.*; import org.commonmark.parser.*; diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java index c8d927246..689a5372e 100644 --- a/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java @@ -1,6 +1,6 @@ package org.commonmark.internal; -import org.commonmark.internal.inline.InlineContentParserFactory; +import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.node.LinkReferenceDefinition; import org.commonmark.parser.InlineParserContext; import org.commonmark.parser.delimiter.DelimiterProcessor; diff --git a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java index 53020ebba..5b91a5a16 100644 --- a/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java @@ -7,7 +7,7 @@ import org.commonmark.parser.InlineParser; import org.commonmark.parser.InlineParserContext; import org.commonmark.parser.SourceLines; -import org.commonmark.parser.beta.Position; +import org.commonmark.parser.beta.*; import org.commonmark.parser.beta.Scanner; import org.commonmark.parser.delimiter.DelimiterProcessor; import org.commonmark.text.Characters; diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java index dd898fcd1..a18966e54 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/AutolinkInlineParser.java @@ -3,8 +3,7 @@ import org.commonmark.node.Link; import org.commonmark.node.Text; import org.commonmark.parser.SourceLines; -import org.commonmark.parser.beta.Position; -import org.commonmark.parser.beta.Scanner; +import org.commonmark.parser.beta.*; import java.util.Set; import java.util.regex.Pattern; diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java index 70583659f..7baeed4de 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/BackslashInlineParser.java @@ -3,7 +3,7 @@ import org.commonmark.internal.util.Escaping; import org.commonmark.node.HardLineBreak; import org.commonmark.node.Text; -import org.commonmark.parser.beta.Scanner; +import org.commonmark.parser.beta.*; import java.util.Set; import java.util.regex.Pattern; diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java index 75025411c..b8e8984e8 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/BackticksInlineParser.java @@ -3,8 +3,7 @@ import org.commonmark.node.Code; import org.commonmark.node.Text; import org.commonmark.parser.SourceLines; -import org.commonmark.parser.beta.Position; -import org.commonmark.parser.beta.Scanner; +import org.commonmark.parser.beta.*; import org.commonmark.text.Characters; import java.util.Set; diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java index 6c226c0a5..c24e60747 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/EntityInlineParser.java @@ -2,8 +2,7 @@ import org.commonmark.internal.util.Html5Entities; import org.commonmark.node.Text; -import org.commonmark.parser.beta.Position; -import org.commonmark.parser.beta.Scanner; +import org.commonmark.parser.beta.*; import org.commonmark.text.AsciiMatcher; import java.util.Set; diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java index 79fdd250b..a48ea5022 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java @@ -1,8 +1,7 @@ package org.commonmark.internal.inline; import org.commonmark.node.HtmlInline; -import org.commonmark.parser.beta.Position; -import org.commonmark.parser.beta.Scanner; +import org.commonmark.parser.beta.*; import org.commonmark.text.AsciiMatcher; import java.util.Set; diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInline.java b/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInline.java deleted file mode 100644 index 7223c1687..000000000 --- a/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInline.java +++ /dev/null @@ -1,24 +0,0 @@ -package org.commonmark.internal.inline; - -import org.commonmark.node.Node; -import org.commonmark.parser.beta.Position; - -public abstract class ParsedInline { - - protected ParsedInline() { - } - - public static ParsedInline none() { - return null; - } - - public static ParsedInline of(Node node, Position position) { - if (node == null) { - throw new NullPointerException("node must not be null"); - } - if (position == null) { - throw new NullPointerException("position must not be null"); - } - return new ParsedInlineImpl(node, position); - } -} diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInlineImpl.java b/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInlineImpl.java index 55f9cc4da..a77630610 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInlineImpl.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/ParsedInlineImpl.java @@ -1,13 +1,14 @@ package org.commonmark.internal.inline; import org.commonmark.node.Node; +import org.commonmark.parser.beta.ParsedInline; import org.commonmark.parser.beta.Position; -public class ParsedInlineImpl extends ParsedInline { +public class ParsedInlineImpl implements ParsedInline { private final Node node; private final Position position; - ParsedInlineImpl(Node node, Position position) { + public ParsedInlineImpl(Node node, Position position) { this.node = node; this.position = position; } diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java index 1c2594033..2b52cb828 100644 --- a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java +++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java @@ -1,6 +1,6 @@ package org.commonmark.parser; -import org.commonmark.internal.inline.InlineContentParserFactory; +import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.node.LinkReferenceDefinition; import org.commonmark.parser.delimiter.DelimiterProcessor; diff --git a/commonmark/src/main/java/org/commonmark/parser/Parser.java b/commonmark/src/main/java/org/commonmark/parser/Parser.java index cedc5ac07..6d15a7192 100644 --- a/commonmark/src/main/java/org/commonmark/parser/Parser.java +++ b/commonmark/src/main/java/org/commonmark/parser/Parser.java @@ -5,7 +5,7 @@ import org.commonmark.internal.InlineParserContextImpl; import org.commonmark.internal.InlineParserImpl; import org.commonmark.internal.LinkReferenceDefinitions; -import org.commonmark.internal.inline.InlineContentParserFactory; +import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.node.*; import org.commonmark.parser.block.BlockParserFactory; import org.commonmark.parser.delimiter.DelimiterProcessor; diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParser.java similarity index 96% rename from commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java rename to commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParser.java index cab1d467f..bc5c9a54f 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParser.java +++ b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParser.java @@ -1,4 +1,4 @@ -package org.commonmark.internal.inline; +package org.commonmark.parser.beta; /** * Parser for a type of inline content. Registered via a {@link InlineContentParserFactory} and created by its diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParserFactory.java b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java similarity index 94% rename from commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParserFactory.java rename to commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java index a5e1fe592..54a5d7f6f 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/InlineContentParserFactory.java +++ b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java @@ -1,4 +1,4 @@ -package org.commonmark.internal.inline; +package org.commonmark.parser.beta; import java.util.Set; diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/InlineParserState.java b/commonmark/src/main/java/org/commonmark/parser/beta/InlineParserState.java similarity index 75% rename from commonmark/src/main/java/org/commonmark/internal/inline/InlineParserState.java rename to commonmark/src/main/java/org/commonmark/parser/beta/InlineParserState.java index ba7369617..e434d45d6 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/InlineParserState.java +++ b/commonmark/src/main/java/org/commonmark/parser/beta/InlineParserState.java @@ -1,7 +1,4 @@ -package org.commonmark.internal.inline; - -import org.commonmark.parser.beta.Position; -import org.commonmark.parser.beta.Scanner; +package org.commonmark.parser.beta; public interface InlineParserState { diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/ParsedInline.java b/commonmark/src/main/java/org/commonmark/parser/beta/ParsedInline.java new file mode 100644 index 000000000..5d1402cae --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/parser/beta/ParsedInline.java @@ -0,0 +1,24 @@ +package org.commonmark.parser.beta; + +import org.commonmark.internal.inline.ParsedInlineImpl; +import org.commonmark.node.Node; + +import java.util.Objects; + +/** + * The result of a single inline parser. Use the static methods to create instances. + *

+ * This interface is not intended to be implemented by clients. + */ +public interface ParsedInline { + + static ParsedInline none() { + return null; + } + + static ParsedInline of(Node node, Position position) { + Objects.requireNonNull(node, "node must not be null"); + Objects.requireNonNull(position, "position must not be null"); + return new ParsedInlineImpl(node, position); + } +} diff --git a/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java b/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java index 212c9a25d..e54aebb26 100644 --- a/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java +++ b/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java @@ -1,9 +1,9 @@ package org.commonmark.parser; -import org.commonmark.internal.inline.InlineContentParser; -import org.commonmark.internal.inline.InlineContentParserFactory; -import org.commonmark.internal.inline.InlineParserState; -import org.commonmark.internal.inline.ParsedInline; +import org.commonmark.parser.beta.InlineContentParser; +import org.commonmark.parser.beta.InlineContentParserFactory; +import org.commonmark.parser.beta.InlineParserState; +import org.commonmark.parser.beta.ParsedInline; import org.commonmark.node.CustomNode; import org.commonmark.node.Heading; import org.commonmark.test.Nodes; diff --git a/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java b/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java index e983870c1..9fa7fb0da 100644 --- a/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java +++ b/commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java @@ -1,7 +1,7 @@ package org.commonmark.test; import org.commonmark.internal.InlineParserImpl; -import org.commonmark.internal.inline.InlineContentParserFactory; +import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.node.LinkReferenceDefinition; import org.commonmark.parser.InlineParser; import org.commonmark.parser.InlineParserContext; From d876efe7e10a242c1129f042f3049ceefdb95c53 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Fri, 26 Apr 2024 17:47:43 +1000 Subject: [PATCH 07/10] Rename Parser.Builder method to align with type --- .../java/org/commonmark/parser/InlineParserContext.java | 8 +++++--- .../src/main/java/org/commonmark/parser/Parser.java | 4 ++-- .../org/commonmark/parser/InlineContentParserTest.java | 6 +++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java index 2b52cb828..dde86b311 100644 --- a/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java +++ b/commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java @@ -1,7 +1,7 @@ package org.commonmark.parser; -import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.node.LinkReferenceDefinition; +import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.parser.delimiter.DelimiterProcessor; import java.util.List; @@ -12,12 +12,14 @@ public interface InlineParserContext { /** - * @return custom inline content parsers that have been configured with {@link Parser.Builder#customInlineContentParser(InlineContentParserFactory)} + * @return custom inline content parsers that have been configured with + * {@link Parser.Builder#customInlineContentParserFactory(InlineContentParserFactory)} */ List getCustomInlineContentParserFactories(); /** - * @return custom delimiter processors that have been configured with {@link Parser.Builder#customDelimiterProcessor(DelimiterProcessor)} + * @return custom delimiter processors that have been configured with + * {@link Parser.Builder#customDelimiterProcessor(DelimiterProcessor)} */ List getCustomDelimiterProcessors(); diff --git a/commonmark/src/main/java/org/commonmark/parser/Parser.java b/commonmark/src/main/java/org/commonmark/parser/Parser.java index 6d15a7192..8d9eb3376 100644 --- a/commonmark/src/main/java/org/commonmark/parser/Parser.java +++ b/commonmark/src/main/java/org/commonmark/parser/Parser.java @@ -5,8 +5,8 @@ import org.commonmark.internal.InlineParserContextImpl; import org.commonmark.internal.InlineParserImpl; import org.commonmark.internal.LinkReferenceDefinitions; -import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.node.*; +import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.parser.block.BlockParserFactory; import org.commonmark.parser.delimiter.DelimiterProcessor; @@ -226,7 +226,7 @@ public Builder customBlockParserFactory(BlockParserFactory blockParserFactory) { * character, or even for some built-in special character such as {@code `}. The custom parsers are tried first * in order in which they are registered, and then the built-in ones. */ - public Builder customInlineContentParser(InlineContentParserFactory inlineContentParserFactory) { + public Builder customInlineContentParserFactory(InlineContentParserFactory inlineContentParserFactory) { Objects.requireNonNull(inlineContentParserFactory, "inlineContentParser must not be null"); inlineContentParserFactories.add(inlineContentParserFactory); return this; diff --git a/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java b/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java index e54aebb26..28e9b5748 100644 --- a/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java +++ b/commonmark/src/test/java/org/commonmark/parser/InlineContentParserTest.java @@ -1,11 +1,11 @@ package org.commonmark.parser; +import org.commonmark.node.CustomNode; +import org.commonmark.node.Heading; import org.commonmark.parser.beta.InlineContentParser; import org.commonmark.parser.beta.InlineContentParserFactory; import org.commonmark.parser.beta.InlineParserState; import org.commonmark.parser.beta.ParsedInline; -import org.commonmark.node.CustomNode; -import org.commonmark.node.Heading; import org.commonmark.test.Nodes; import org.junit.Test; @@ -17,7 +17,7 @@ public class InlineContentParserTest { @Test public void customInlineContentParser() { - var parser = Parser.builder().customInlineContentParser(new DollarInlineParser.Factory()).build(); + var parser = Parser.builder().customInlineContentParserFactory(new DollarInlineParser.Factory()).build(); var doc = parser.parse("Test: $hey *there*$ $you$\n\n# Heading $heading$\n"); var inline1 = Nodes.find(doc, DollarInline.class); assertEquals("hey *there*", inline1.getLiteral()); From f48193550ae148e18afe1f8e3ce46d3c4e2823b8 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Fri, 26 Apr 2024 17:53:18 +1000 Subject: [PATCH 08/10] Add CHANGELOG --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index df6beb202..2fc29ab3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ This project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html with the exception that 0.x versions can break between minor versions. ## Unreleased +### Added +- Support for extending inline parsing with custom inline content parsers! See + `Parser.Builder#customInlineContentParserFactory`. This allows users or + extensions to hook into inline parsing on a deeper level than using delimiter + processors. It could be used to implement support for math/latex formulas for + example. ### Fixed - Fix parsing of link reference definitions where it looks like it has a title but it doesn't because it's followed by characters other than space/tab. In that From 0fd2427f4061b5dedfb4f9aff8b7bd0797da9d4f Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Fri, 26 Apr 2024 19:02:49 +1000 Subject: [PATCH 09/10] Add some more Javadoc with links --- commonmark/src/main/java/org/commonmark/parser/Parser.java | 7 +++++-- .../commonmark/parser/beta/InlineContentParserFactory.java | 5 +++++ .../commonmark/parser/delimiter/DelimiterProcessor.java | 2 ++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/commonmark/src/main/java/org/commonmark/parser/Parser.java b/commonmark/src/main/java/org/commonmark/parser/Parser.java index 8d9eb3376..febe05b7c 100644 --- a/commonmark/src/main/java/org/commonmark/parser/Parser.java +++ b/commonmark/src/main/java/org/commonmark/parser/Parser.java @@ -219,7 +219,7 @@ public Builder customBlockParserFactory(BlockParserFactory blockParserFactory) { } /** - * Add a factory for a custom inline content parser, for additional inline parsing or overriding built-in parsing. + * Add a factory for a custom inline content parser, for extending inline parsing or overriding built-in parsing. *

* Note that parsers are triggered based on a special character as specified by * {@link InlineContentParserFactory#getTriggerCharacters()}. It is possible to register multiple parsers for the same @@ -233,11 +233,14 @@ public Builder customInlineContentParserFactory(InlineContentParserFactory inlin } /** - * Add a custom delimiter processor. + * Add a custom delimiter processor for inline parsing. *

* Note that multiple delimiter processors with the same characters can be added, as long as they have a * different minimum length. In that case, the processor with the shortest matching length is used. Adding more * than one delimiter processor with the same character and minimum length is invalid. + *

+ * If you want more control over how parsing is done, you might want to use + * {@link #customInlineContentParserFactory} instead. * * @param delimiterProcessor a delimiter processor implementation * @return {@code this} diff --git a/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java index 54a5d7f6f..c86f93a41 100644 --- a/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java +++ b/commonmark/src/main/java/org/commonmark/parser/beta/InlineContentParserFactory.java @@ -2,6 +2,11 @@ import java.util.Set; +/** + * A factory for extending inline content parsing. + *

+ * See {@link org.commonmark.parser.Parser.Builder#customInlineContentParserFactory} for how to register it. + */ public interface InlineContentParserFactory { /** diff --git a/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java b/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java index 897943d66..3b6abf214 100644 --- a/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java +++ b/commonmark/src/main/java/org/commonmark/parser/delimiter/DelimiterProcessor.java @@ -6,6 +6,8 @@ * Custom delimiter processor for additional delimiters besides {@code _} and {@code *}. *

* Note that implementations of this need to be thread-safe, the same instance may be used by multiple parsers. + * + * @see org.commonmark.parser.beta.InlineContentParserFactory */ public interface DelimiterProcessor { From 6b16c69326710732d0ba94520c875b9798e30039 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Fri, 26 Apr 2024 19:03:18 +1000 Subject: [PATCH 10/10] README: Add section about customizing parsing --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index e4b07bfdc..bcf587f54 100644 --- a/README.md +++ b/README.md @@ -221,6 +221,19 @@ elements in the resulting HTML, you can create your own subclass of To define the HTML rendering for them, you can use a `NodeRenderer` as explained above. +#### Customize parsing + +There are a few ways to extend parsing or even override built-in parsing, +all of them via methods on `Parser.Builder` +(see [Blocks and inlines](https://spec.commonmark.org/0.31.2/#blocks-and-inlines) in the spec for an overview of blocks/inlines): + +- Parsing of specific block types (e.g. headings, code blocks, etc) can be + enabled/disabled with `enabledBlockTypes` +- Parsing of blocks can be extended/overridden with `customBlockParserFactory` +- Parsing of inline content can be extended/overridden with `customInlineContentParserFactory` +- Parsing of [delimiters](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis) in inline content can be + extended with `customDelimiterProcessor` + #### Thread-safety Both the `Parser` and `HtmlRenderer` are designed so that you can