diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a57773ac6c..7a4e6b6a78 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,12 +12,12 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macOS-latest] # choosing to run a reduced set of LTS, current, and next, to balance coverage and execution time - java: [8, 17, 20] + java: [8, 17, 21] fail-fast: false name: Test JDK ${{ matrix.java }}, ${{ matrix.os }} steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up JDK ${{ matrix.java }} uses: actions/setup-java@v3 diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml index 3d265ab557..23df68aa2c 100644 --- a/.github/workflows/cifuzz.yml +++ b/.github/workflows/cifuzz.yml @@ -19,7 +19,7 @@ jobs: dry-run: false language: jvm - name: Upload Crash - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v3 if: failure() && steps.build.outcome == 'success' with: name: artifacts diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 7eaa5be624..0eea129b29 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -14,7 +14,7 @@ jobs: name: "CodeQL" steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up JDK uses: actions/setup-java@v3 with: diff --git a/CHANGES b/CHANGES index 9f05522ca6..4eaed85ac6 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,84 @@ jsoup changelog -Release 1.16.2 [PENDING] +Release 1.17.1 [PENDING] + * Improvement: in Jsoup.connect(), added support for request-level authentication, supporting authentication to + proxies and to servers. + + + * Improvement: in the Elements list, added direct support for `#set(index, element)`, `#remove(index)`, + `#remove(object)`, `#clear()`, `#removeAll(collection)`, `#retainAll(collection)`, `#removeIf(filter)`, + `#replaceAll(operator)`. These methods update the original DOM, as well as the Elements list. + + + * Improvement: added the NodeIterator class, to efficiently traverse a node tree using the Iterator interface. And + added Stream Element#stream() and Node#nodeStream() methods, to enable fluent composable stream pipelines of node + traversals. + + + * Improvement: when changing the OutputSettings syntax to XML, the xhtml EscapeMode is automatically set by default. + + * Improvement: added the `:is(selector list)` pseudo-selector, which finds elements that match any of the selectors in + the selector list. Useful for making large ORed selectors more readable. + + * Improvement: repackaged the library with native (vs automatic) JPMS module support. + + + * Improvement: better fidelity of source positions when tracking is enabled. And implicitly created or closed elements + are tracked and detectable via Range.isImplicit(). + + + * Improvement: when source tracking is enabled, the source position for attribute names and values is now available. + Attribute#sourceRange() provides the ranges. + + + * Improvement: when running concurrently under Java 21+ Virtual Threads, virtual threads could be pinned to their + carrier platform thread when parsing an input stream. To improve performance, particularly when parsing fetched + URLs, the internal ConstrainableInputStream has been replaced by ControllableInputStream, which avoids the locking + which caused that pinning. + + + * Improvement: in Jsoup.Connect, allow any XML mimetype as a supported mimetype. Was previously limited to + `{application|text}/xml`. This enables for e.g. fetching SVGs with a image/svg+xml mimetype, without having to + disable mimetype validation. + + + * Bugfix: when outputting with XML syntax, HTML elements that were parsed as data nodes ( - - Token.StartTag startPending = new Token.StartTag(); - Token.EndTag endPending = new Token.EndTag(); - Token.Tag tagPending = startPending; // tag we are building up: start or end pending - Token.Character charPending = new Token.Character(); - Token.Doctype doctypePending = new Token.Doctype(); // doctype building up - Token.Comment commentPending = new Token.Comment(); // comment building up + final StringBuilder dataBuffer = new StringBuilder(1024); // buffers data looking for + + final Token.StartTag startPending; + final Token.EndTag endPending = new Token.EndTag(); + Token.Tag tagPending; // tag we are building up: start or end pending + final Token.Character charPending = new Token.Character(); + final Token.Doctype doctypePending = new Token.Doctype(); // doctype building up + final Token.Comment commentPending = new Token.Comment(); // comment building up @Nullable private String lastStartTag; // the last start tag emitted, to test appropriate end tag @Nullable private String lastStartCloseSeq; // " 0) - t.tagPending.appendAttributeValue(value); + t.tagPending.appendAttributeValue(value, pos, r.pos()); else t.tagPending.setEmptyAttributeValue(); + pos = r.pos(); char c = r.consume(); switch (c) { case '"': @@ -755,31 +759,33 @@ void read(Tokeniser t, CharacterReader r) { case '&': int[] ref = t.consumeCharacterReference('"', true); if (ref != null) - t.tagPending.appendAttributeValue(ref); + t.tagPending.appendAttributeValue(ref, pos, r.pos()); else - t.tagPending.appendAttributeValue('&'); + t.tagPending.appendAttributeValue('&', pos, r.pos()); break; case nullChar: t.error(this); - t.tagPending.appendAttributeValue(replacementChar); + t.tagPending.appendAttributeValue(replacementChar, pos, r.pos()); break; case eof: t.eofError(this); t.transition(Data); break; default: // hit end of buffer in first read, still in attribute - t.tagPending.appendAttributeValue(c); + t.tagPending.appendAttributeValue(c, pos, r.pos()); } } }, AttributeValue_singleQuoted { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { + int pos = r.pos(); String value = r.consumeAttributeQuoted(true); if (value.length() > 0) - t.tagPending.appendAttributeValue(value); + t.tagPending.appendAttributeValue(value, pos, r.pos()); else t.tagPending.setEmptyAttributeValue(); + pos = r.pos(); char c = r.consume(); switch (c) { case '\'': @@ -788,29 +794,31 @@ void read(Tokeniser t, CharacterReader r) { case '&': int[] ref = t.consumeCharacterReference('\'', true); if (ref != null) - t.tagPending.appendAttributeValue(ref); + t.tagPending.appendAttributeValue(ref, pos, r.pos()); else - t.tagPending.appendAttributeValue('&'); + t.tagPending.appendAttributeValue('&', pos, r.pos()); break; case nullChar: t.error(this); - t.tagPending.appendAttributeValue(replacementChar); + t.tagPending.appendAttributeValue(replacementChar, pos, r.pos()); break; case eof: t.eofError(this); t.transition(Data); break; default: // hit end of buffer in first read, still in attribute - t.tagPending.appendAttributeValue(c); + t.tagPending.appendAttributeValue(c, pos, r.pos()); } } }, AttributeValue_unquoted { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { + int pos = r.pos(); String value = r.consumeToAnySorted(attributeValueUnquoted); if (value.length() > 0) - t.tagPending.appendAttributeValue(value); + t.tagPending.appendAttributeValue(value, pos, r.pos()); + pos = r.pos(); char c = r.consume(); switch (c) { case '\t': @@ -823,9 +831,9 @@ void read(Tokeniser t, CharacterReader r) { case '&': int[] ref = t.consumeCharacterReference('>', true); if (ref != null) - t.tagPending.appendAttributeValue(ref); + t.tagPending.appendAttributeValue(ref, pos, r.pos()); else - t.tagPending.appendAttributeValue('&'); + t.tagPending.appendAttributeValue('&', pos, r.pos()); break; case '>': t.emitTagPending(); @@ -833,7 +841,7 @@ void read(Tokeniser t, CharacterReader r) { break; case nullChar: t.error(this); - t.tagPending.appendAttributeValue(replacementChar); + t.tagPending.appendAttributeValue(replacementChar, pos, r.pos()); break; case eof: t.eofError(this); @@ -845,17 +853,17 @@ void read(Tokeniser t, CharacterReader r) { case '=': case '`': t.error(this); - t.tagPending.appendAttributeValue(c); + t.tagPending.appendAttributeValue(c, pos, r.pos()); break; default: // hit end of buffer in first read, still in attribute - t.tagPending.appendAttributeValue(c); + t.tagPending.appendAttributeValue(c, pos, r.pos()); } } }, // CharacterReferenceInAttributeValue state handled inline AfterAttributeValue_quoted { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\t': @@ -885,7 +893,7 @@ void read(Tokeniser t, CharacterReader r) { } }, SelfClosingStartTag { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '>': @@ -905,7 +913,7 @@ void read(Tokeniser t, CharacterReader r) { } }, BogusComment { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { // todo: handle bogus comment starting from eof. when does that trigger? t.commentPending.append(r.consumeTo('>')); // todo: replace nullChar with replaceChar @@ -918,7 +926,7 @@ void read(Tokeniser t, CharacterReader r) { } }, MarkupDeclarationOpen { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { if (r.matchConsume("--")) { t.createCommentPending(); t.transition(CommentStart); @@ -938,7 +946,7 @@ void read(Tokeniser t, CharacterReader r) { } }, CommentStart { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '-': @@ -966,7 +974,7 @@ void read(Tokeniser t, CharacterReader r) { } }, CommentStartDash { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '-': @@ -994,7 +1002,7 @@ void read(Tokeniser t, CharacterReader r) { } }, Comment { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.current(); switch (c) { case '-': @@ -1016,7 +1024,7 @@ void read(Tokeniser t, CharacterReader r) { } }, CommentEndDash { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '-': @@ -1039,7 +1047,7 @@ void read(Tokeniser t, CharacterReader r) { } }, CommentEnd { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '>': @@ -1069,7 +1077,7 @@ void read(Tokeniser t, CharacterReader r) { } }, CommentEndBang { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '-': @@ -1097,7 +1105,7 @@ void read(Tokeniser t, CharacterReader r) { } }, Doctype { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\t': @@ -1124,7 +1132,7 @@ void read(Tokeniser t, CharacterReader r) { } }, BeforeDoctypeName { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { if (r.matchesAsciiAlpha()) { t.createDoctypePending(); t.transition(DoctypeName); @@ -1159,7 +1167,7 @@ void read(Tokeniser t, CharacterReader r) { } }, DoctypeName { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { if (r.matchesLetter()) { String name = r.consumeLetterSequence(); t.doctypePending.name.append(name); @@ -1194,7 +1202,7 @@ void read(Tokeniser t, CharacterReader r) { } }, AfterDoctypeName { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { if (r.isEmpty()) { t.eofError(this); t.doctypePending.forceQuirks = true; @@ -1222,7 +1230,7 @@ else if (r.matches('>')) { } }, AfterDoctypePublicKeyword { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\t': @@ -1262,7 +1270,7 @@ void read(Tokeniser t, CharacterReader r) { } }, BeforeDoctypePublicIdentifier { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\t': @@ -1299,7 +1307,7 @@ void read(Tokeniser t, CharacterReader r) { } }, DoctypePublicIdentifier_doubleQuoted { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '"': @@ -1327,7 +1335,7 @@ void read(Tokeniser t, CharacterReader r) { } }, DoctypePublicIdentifier_singleQuoted { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\'': @@ -1355,7 +1363,7 @@ void read(Tokeniser t, CharacterReader r) { } }, AfterDoctypePublicIdentifier { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\t': @@ -1393,7 +1401,7 @@ void read(Tokeniser t, CharacterReader r) { } }, BetweenDoctypePublicAndSystemIdentifiers { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\t': @@ -1430,7 +1438,7 @@ void read(Tokeniser t, CharacterReader r) { } }, AfterDoctypeSystemKeyword { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\t': @@ -1470,7 +1478,7 @@ void read(Tokeniser t, CharacterReader r) { } }, BeforeDoctypeSystemIdentifier { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\t': @@ -1507,7 +1515,7 @@ void read(Tokeniser t, CharacterReader r) { } }, DoctypeSystemIdentifier_doubleQuoted { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '"': @@ -1535,7 +1543,7 @@ void read(Tokeniser t, CharacterReader r) { } }, DoctypeSystemIdentifier_singleQuoted { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\'': @@ -1563,7 +1571,7 @@ void read(Tokeniser t, CharacterReader r) { } }, AfterDoctypeSystemIdentifier { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\t': @@ -1590,7 +1598,7 @@ void read(Tokeniser t, CharacterReader r) { } }, BogusDoctype { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '>': @@ -1608,7 +1616,7 @@ void read(Tokeniser t, CharacterReader r) { } }, CdataSection { - void read(Tokeniser t, CharacterReader r) { + @Override void read(Tokeniser t, CharacterReader r) { String data = r.consumeTo("]]>"); t.dataBuffer.append(data); if (r.matchConsume("]]>") || r.isEmpty()) { diff --git a/src/main/java/org/jsoup/parser/TreeBuilder.java b/src/main/java/org/jsoup/parser/TreeBuilder.java index 6c9a78b5bc..eadfa28447 100644 --- a/src/main/java/org/jsoup/parser/TreeBuilder.java +++ b/src/main/java/org/jsoup/parser/TreeBuilder.java @@ -1,14 +1,13 @@ package org.jsoup.parser; import org.jsoup.helper.Validate; +import org.jsoup.internal.SharedConstants; import org.jsoup.nodes.Attributes; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.nodes.Range; -import javax.annotation.Nullable; -import javax.annotation.ParametersAreNonnullByDefault; import java.io.Reader; import java.util.ArrayList; import java.util.HashMap; @@ -24,21 +23,20 @@ abstract class TreeBuilder { protected Parser parser; CharacterReader reader; Tokeniser tokeniser; - protected Document doc; // current doc we are building into - protected ArrayList stack; // the stack of open elements - protected String baseUri; // current base uri, for creating new elements - protected Token currentToken; // currentToken is used only for error tracking. - protected ParseSettings settings; - protected Map seenTags; // tags we've used in this parse; saves tag GC for custom tags. - - private Token.StartTag start = new Token.StartTag(); // start tag to process - private Token.EndTag end = new Token.EndTag(); + Document doc; // current doc we are building into + ArrayList stack; // the stack of open elements + String baseUri; // current base uri, for creating new elements + Token currentToken; // currentToken is used only for error tracking. + ParseSettings settings; + Map seenTags; // tags we've used in this parse; saves tag GC for custom tags. + + private Token.StartTag start; // start tag to process + private final Token.EndTag end = new Token.EndTag(); abstract ParseSettings defaultSettings(); private boolean trackSourceRange; // optionally tracks the source range of nodes - @ParametersAreNonnullByDefault - protected void initialiseParse(Reader input, String baseUri, Parser parser) { + void initialiseParse(Reader input, String baseUri, Parser parser) { Validate.notNullParam(input, "input"); Validate.notNullParam(baseUri, "baseUri"); Validate.notNull(parser); @@ -51,13 +49,13 @@ protected void initialiseParse(Reader input, String baseUri, Parser parser) { trackSourceRange = parser.isTrackPosition(); reader.trackNewlines(parser.isTrackErrors() || trackSourceRange); // when tracking errors or source ranges, enable newline tracking for better legibility currentToken = null; - tokeniser = new Tokeniser(reader, parser.getErrors()); + tokeniser = new Tokeniser(reader, parser.getErrors(), trackSourceRange); stack = new ArrayList<>(32); seenTags = new HashMap<>(); + start = new Token.StartTag(trackSourceRange, reader); this.baseUri = baseUri; } - @ParametersAreNonnullByDefault Document parse(Reader input, String baseUri, Parser parser) { initialiseParse(input, baseUri, parser); runParser(); @@ -80,55 +78,77 @@ Document parse(Reader input, String baseUri, Parser parser) { abstract List parseFragment(String inputFragment, Element context, String baseUri, Parser parser); - protected void runParser() { + void runParser() { final Tokeniser tokeniser = this.tokeniser; final Token.TokenType eof = Token.TokenType.EOF; while (true) { Token token = tokeniser.read(); + currentToken = token; process(token); - token.reset(); - if (token.type == eof) break; + token.reset(); } + + // once we hit the end, pop remaining items off the stack + while (!stack.isEmpty()) pop(); } - protected abstract boolean process(Token token); + abstract boolean process(Token token); - protected boolean processStartTag(String name) { + boolean processStartTag(String name) { // these are "virtual" start tags (auto-created by the treebuilder), so not tracking the start position final Token.StartTag start = this.start; if (currentToken == start) { // don't recycle an in-use token - return process(new Token.StartTag().name(name)); + return process(new Token.StartTag(trackSourceRange, reader).name(name)); } return process(start.reset().name(name)); } - public boolean processStartTag(String name, Attributes attrs) { + boolean processStartTag(String name, Attributes attrs) { final Token.StartTag start = this.start; if (currentToken == start) { // don't recycle an in-use token - return process(new Token.StartTag().nameAttr(name, attrs)); + return process(new Token.StartTag(trackSourceRange, reader).nameAttr(name, attrs)); } start.reset(); start.nameAttr(name, attrs); return process(start); } - protected boolean processEndTag(String name) { + boolean processEndTag(String name) { if (currentToken == end) { // don't recycle an in-use token return process(new Token.EndTag().name(name)); } return process(end.reset().name(name)); } + /** + Removes the last Element from the stack, hits onNodeClosed, and then returns it. + * @return + */ + final Element pop() { + int size = stack.size(); + Element removed = stack.remove(size - 1); + onNodeClosed(removed); + return removed; + } + + /** + Adds the specified Element to the end of the stack, and hits onNodeInserted. + * @param element + */ + final void push(Element element) { + stack.add(element); + onNodeInserted(element); + } /** Get the current element (last on the stack). If all items have been removed, returns the document instead (which might not actually be on the stack; use stack.size() == 0 to test if required. @return the last element on the stack, if any; or the root document */ - protected Element currentElement() { + Element currentElement() { int size = stack.size(); return size > 0 ? stack.get(size-1) : doc; } @@ -138,7 +158,7 @@ protected Element currentElement() { @param normalName name to check @return true if there is a current element on the stack, and its name equals the supplied */ - protected boolean currentElementIs(String normalName) { + boolean currentElementIs(String normalName) { if (stack.size() == 0) return false; Element current = currentElement(); @@ -152,7 +172,7 @@ protected boolean currentElementIs(String normalName) { @param namespace the namespace @return true if there is a current element on the stack, and its name equals the supplied */ - protected boolean currentElementIs(String normalName, String namespace) { + boolean currentElementIs(String normalName, String namespace) { if (stack.size() == 0) return false; Element current = currentElement(); @@ -164,7 +184,7 @@ protected boolean currentElementIs(String normalName, String namespace) { * If the parser is tracking errors, add an error at the current position. * @param msg error message */ - protected void error(String msg) { + void error(String msg) { error(msg, (Object[]) null); } @@ -173,7 +193,7 @@ protected void error(String msg) { * @param msg error message template * @param args template arguments */ - protected void error(String msg, Object... args) { + void error(String msg, Object... args) { ParseErrorList errors = parser.getErrors(); if (errors.canAddError()) errors.add(new ParseError(reader, msg, args)); @@ -183,11 +203,11 @@ protected void error(String msg, Object... args) { (An internal method, visible for Element. For HTML parse, signals that script and style text should be treated as Data Nodes). */ - protected boolean isContentForTagData(String normalName) { + boolean isContentForTagData(String normalName) { return false; } - protected Tag tagFor(String tagName, String namespace, ParseSettings settings) { + Tag tagFor(String tagName, String namespace, ParseSettings settings) { Tag cached = seenTags.get(tagName); // note that we don't normalize the cache key. But tag via valueOf may be normalized. if (cached == null || !cached.namespace().equals(namespace)) { // only return from cache if the namespace is the same. not running nested cache to save double hit on the common flow @@ -198,7 +218,7 @@ protected Tag tagFor(String tagName, String namespace, ParseSettings settings) { return cached; } - protected Tag tagFor(String tagName, ParseSettings settings) { + Tag tagFor(String tagName, ParseSettings settings) { return tagFor(tagName, defaultNamespace(), settings); } @@ -206,40 +226,58 @@ protected Tag tagFor(String tagName, ParseSettings settings) { Gets the default namespace for this TreeBuilder * @return the default namespace */ - protected String defaultNamespace() { + String defaultNamespace() { return NamespaceHtml; } /** Called by implementing TreeBuilders when a node has been inserted. This implementation includes optionally tracking - the source range of the node. - * @param node the node that was just inserted - * @param token the (optional) token that created this node + the source range of the node. @param node the node that was just inserted */ - protected void onNodeInserted(Node node, @Nullable Token token) { - trackNodePosition(node, token, true); + void onNodeInserted(Node node) { + trackNodePosition(node, true); } /** Called by implementing TreeBuilders when a node is explicitly closed. This implementation includes optionally - tracking the closing source range of the node. - * @param node the node being closed - * @param token the end-tag token that closed this node + tracking the closing source range of the node. @param node the node being closed */ - protected void onNodeClosed(Node node, Token token) { - trackNodePosition(node, token, false); + void onNodeClosed(Node node) { + trackNodePosition(node, false); } - private void trackNodePosition(Node node, @Nullable Token token, boolean start) { - if (trackSourceRange && token != null) { - int startPos = token.startPos(); - if (startPos == Token.Unset) return; // untracked, virtual token - - Range.Position startRange = new Range.Position(startPos, reader.lineNumber(startPos), reader.columnNumber(startPos)); - int endPos = token.endPos(); - Range.Position endRange = new Range.Position(endPos, reader.lineNumber(endPos), reader.columnNumber(endPos)); - Range range = new Range(startRange, endRange); - range.track(node, start); + private void trackNodePosition(Node node, boolean isStart) { + if (!trackSourceRange) return; + + final Token token = currentToken; + int startPos = token.startPos(); + int endPos = token.endPos(); + + // handle implicit element open / closes. + if (node instanceof Element) { + final Element el = (Element) node; + if (token.isEOF()) { + if (el.endSourceRange().isTracked()) + return; // /body and /html are left on stack until EOF, don't reset them + startPos = endPos = reader.pos(); + } else if (isStart) { // opening tag + if (!token.isStartTag() || !el.normalName().equals(token.asStartTag().normalName)) { + endPos = startPos; + } + } else { // closing tag + if (!el.tag().isEmpty() && !el.tag().isSelfClosing()) { + if (!token.isEndTag() || !el.normalName().equals(token.asEndTag().normalName)) { + endPos = startPos; + } + } + } } + + Range.Position startPosition = new Range.Position + (startPos, reader.lineNumber(startPos), reader.columnNumber(startPos)); + Range.Position endPosition = new Range.Position + (endPos, reader.lineNumber(endPos), reader.columnNumber(endPos)); + Range range = new Range(startPosition, endPosition); + node.attributes().userData(isStart ? SharedConstants.RangeKey : SharedConstants.EndRangeKey, range); } } diff --git a/src/main/java/org/jsoup/parser/XmlTreeBuilder.java b/src/main/java/org/jsoup/parser/XmlTreeBuilder.java index e1999f9679..8ef653eee3 100644 --- a/src/main/java/org/jsoup/parser/XmlTreeBuilder.java +++ b/src/main/java/org/jsoup/parser/XmlTreeBuilder.java @@ -7,11 +7,11 @@ import org.jsoup.nodes.DocumentType; import org.jsoup.nodes.Element; import org.jsoup.nodes.Entities; +import org.jsoup.nodes.LeafNode; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; import org.jsoup.nodes.XmlDeclaration; -import javax.annotation.ParametersAreNonnullByDefault; import java.io.Reader; import java.io.StringReader; import java.util.List; @@ -26,14 +26,14 @@ * @author Jonathan Hedley */ public class XmlTreeBuilder extends TreeBuilder { - ParseSettings defaultSettings() { + @Override ParseSettings defaultSettings() { return ParseSettings.preserveCase; } - @Override @ParametersAreNonnullByDefault + @Override protected void initialiseParse(Reader input, String baseUri, Parser parser) { super.initialiseParse(input, baseUri, parser); - stack.add(doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack) + stack.add(doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack). Note not push()ed, so not onNodeInserted. doc.outputSettings() .syntax(Document.OutputSettings.Syntax.xml) .escapeMode(Entities.EscapeMode.xhtml) @@ -59,22 +59,24 @@ XmlTreeBuilder newInstance() { @Override protected boolean process(Token token) { + currentToken = token; + // start tag, end tag, doctype, comment, character, eof switch (token.type) { case StartTag: - insert(token.asStartTag()); + insertElementFor(token.asStartTag()); break; case EndTag: popStackToClose(token.asEndTag()); break; case Comment: - insert(token.asComment()); + insertCommentFor(token.asComment()); break; case Character: - insert(token.asCharacter()); + insertCharacterFor(token.asCharacter()); break; case Doctype: - insert(token.asDoctype()); + insertDoctypeFor(token.asDoctype()); break; case EOF: // could put some normalisation here if desired break; @@ -84,53 +86,63 @@ protected boolean process(Token token) { return true; } - protected void insertNode(Node node) { - currentElement().appendChild(node); - onNodeInserted(node, null); - } - - protected void insertNode(Node node, Token token) { - currentElement().appendChild(node); - onNodeInserted(node, token); - } - - Element insert(Token.StartTag startTag) { + void insertElementFor(Token.StartTag startTag) { Tag tag = tagFor(startTag.name(), settings); - if (startTag.hasAttributes()) + if (startTag.attributes != null) startTag.attributes.deduplicate(settings); Element el = new Element(tag, null, settings.normalizeAttributes(startTag.attributes)); - insertNode(el, startTag); + currentElement().appendChild(el); + push(el); + if (startTag.isSelfClosing()) { tag.setSelfClosing(); - } else { - stack.add(el); + pop(); // push & pop ensures onNodeInserted & onNodeClosed } - return el; } - void insert(Token.Comment commentToken) { + void insertLeafNode(LeafNode node) { + currentElement().appendChild(node); + onNodeInserted(node); + } + + void insertCommentFor(Token.Comment commentToken) { Comment comment = new Comment(commentToken.getData()); - Node insert = comment; + LeafNode insert = comment; if (commentToken.bogus && comment.isXmlDeclaration()) { // xml declarations are emitted as bogus comments (which is right for html, but not xml) // so we do a bit of a hack and parse the data as an element to pull the attributes out + // todo - refactor this to parse more appropriately XmlDeclaration decl = comment.asXmlDeclaration(); // else, we couldn't parse it as a decl, so leave as a comment if (decl != null) insert = decl; } - insertNode(insert, commentToken); + insertLeafNode(insert); } - void insert(Token.Character token) { + void insertCharacterFor(Token.Character token) { final String data = token.getData(); - insertNode(token.isCData() ? new CDataNode(data) : new TextNode(data), token); + insertLeafNode(token.isCData() ? new CDataNode(data) : new TextNode(data)); + } + + void insertDoctypeFor(Token.Doctype token) { + DocumentType doctypeNode = new DocumentType(settings.normalizeTag(token.getName()), token.getPublicIdentifier(), token.getSystemIdentifier()); + doctypeNode.setPubSysKey(token.getPubSysKey()); + insertLeafNode(doctypeNode); + } + + /** @deprecated unused and will be removed. */ + @Deprecated + protected void insertNode(Node node) { + currentElement().appendChild(node); + onNodeInserted(node); } - void insert(Token.Doctype d) { - DocumentType doctypeNode = new DocumentType(settings.normalizeTag(d.getName()), d.getPublicIdentifier(), d.getSystemIdentifier()); - doctypeNode.setPubSysKey(d.getPubSysKey()); - insertNode(doctypeNode, d); + /** @deprecated unused and will be removed. */ + @Deprecated + protected void insertNode(Node node, Token token) { + currentElement().appendChild(node); + onNodeInserted(node); } /** @@ -158,25 +170,21 @@ protected void popStackToClose(Token.EndTag endTag) { return; // not found, skip for (int pos = stack.size() -1; pos >= 0; pos--) { - Element next = stack.get(pos); - stack.remove(pos); + Element next = pop(); if (next == firstFound) { - onNodeClosed(next, endTag); break; } } } private static final int maxQueueDepth = 256; // an arbitrary tension point between real XML and crafted pain - - List parseFragment(String inputFragment, String baseUri, Parser parser) { initialiseParse(new StringReader(inputFragment), baseUri, parser); runParser(); return doc.childNodes(); } - List parseFragment(String inputFragment, Element context, String baseUri, Parser parser) { + @Override List parseFragment(String inputFragment, Element context, String baseUri, Parser parser) { return parseFragment(inputFragment, baseUri, parser); } } diff --git a/src/main/java/org/jsoup/parser/package-info.java b/src/main/java/org/jsoup/parser/package-info.java index f1b3c88741..35d25a5248 100644 --- a/src/main/java/org/jsoup/parser/package-info.java +++ b/src/main/java/org/jsoup/parser/package-info.java @@ -1,7 +1,7 @@ /** Contains the HTML parser, tag specifications, and HTML tokeniser. */ -@NonnullByDefault +@NullMarked package org.jsoup.parser; -import org.jsoup.internal.NonnullByDefault; +import org.jspecify.annotations.NullMarked; diff --git a/src/main/java/org/jsoup/safety/Cleaner.java b/src/main/java/org/jsoup/safety/Cleaner.java index 7b9317ec29..b84608abac 100644 --- a/src/main/java/org/jsoup/safety/Cleaner.java +++ b/src/main/java/org/jsoup/safety/Cleaner.java @@ -9,15 +9,12 @@ import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; import org.jsoup.parser.ParseErrorList; -import org.jsoup.parser.ParseSettings; import org.jsoup.parser.Parser; -import org.jsoup.parser.Tag; import org.jsoup.select.NodeTraversor; import org.jsoup.select.NodeVisitor; import java.util.List; - /** The safelist based HTML cleaner. Use to ensure that end-user provided HTML contains only the elements and attributes that you are expecting; no junk, and no cross-site scripting attacks! @@ -137,7 +134,7 @@ private CleaningVisitor(Element root, Element destination) { this.destination = destination; } - public void head(Node source, int depth) { + @Override public void head(Node source, int depth) { if (source instanceof Element) { Element sourceEl = (Element) source; @@ -155,7 +152,7 @@ public void head(Node source, int depth) { TextNode sourceText = (TextNode) source; TextNode destText = new TextNode(sourceText.getWholeText()); destination.appendChild(destText); - } else if (source instanceof DataNode && safelist.isSafeTag(source.parent().nodeName())) { + } else if (source instanceof DataNode && safelist.isSafeTag(source.parent().normalName())) { DataNode sourceData = (DataNode) source; DataNode destData = new DataNode(sourceData.getWholeData()); destination.appendChild(destData); @@ -164,8 +161,8 @@ public void head(Node source, int depth) { } } - public void tail(Node source, int depth) { - if (source instanceof Element && safelist.isSafeTag(source.nodeName())) { + @Override public void tail(Node source, int depth) { + if (source instanceof Element && safelist.isSafeTag(source.normalName())) { destination = destination.parent(); // would have descended, so pop destination stack } } @@ -178,11 +175,12 @@ private int copySafeNodes(Element source, Element dest) { } private ElementMeta createSafeElement(Element sourceEl) { + Element dest = sourceEl.shallowClone(); // reuses tag, clones attributes and preserves any user data String sourceTag = sourceEl.tagName(); - Attributes destAttrs = new Attributes(); - Element dest = new Element(Tag.valueOf(sourceTag, sourceEl.tag().namespace(), ParseSettings.preserveCase), sourceEl.baseUri(), destAttrs); - int numDiscarded = 0; + Attributes destAttrs = dest.attributes(); + dest.clearAttributes(); // clear all non-internal attributes, ready for safe copy + int numDiscarded = 0; Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { if (safelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) @@ -192,14 +190,7 @@ private ElementMeta createSafeElement(Element sourceEl) { } Attributes enforcedAttrs = safelist.getEnforcedAttributes(sourceTag); destAttrs.addAll(enforcedAttrs); - - // Copy the original start and end range, if set - // TODO - might be good to make a generic Element#userData set type interface, and copy those all over - if (sourceEl.sourceRange().isTracked()) - sourceEl.sourceRange().track(dest, true); - if (sourceEl.endSourceRange().isTracked()) - sourceEl.endSourceRange().track(dest, false); - + dest.attributes().addAll(destAttrs); // re-attach, if removed in clear return new ElementMeta(dest, numDiscarded); } diff --git a/src/main/java/org/jsoup/safety/Safelist.java b/src/main/java/org/jsoup/safety/Safelist.java index d1f275df39..eb1281ba6d 100644 --- a/src/main/java/org/jsoup/safety/Safelist.java +++ b/src/main/java/org/jsoup/safety/Safelist.java @@ -6,6 +6,7 @@ Thank you to Ryan Grove (wonko.com) for the Ruby HTML cleaner http://github.com/ */ import org.jsoup.helper.Validate; +import org.jsoup.internal.Normalizer; import org.jsoup.nodes.Attribute; import org.jsoup.nodes.Attributes; import org.jsoup.nodes.Element; @@ -52,9 +53,8 @@ If you need to allow more through (please be careful!), tweak a base safelist wi

The cleaner and these safelists assume that you want to clean a body fragment of HTML (to add user - supplied HTML into a templated page), and not to clean a full HTML document. If the latter is the case, either wrap the - document HTML around the cleaned body HTML, or create a safelist that allows html and head - elements as appropriate. + supplied HTML into a templated page), and not to clean a full HTML document. If the latter is the case, you could wrap + the templated document HTML around the cleaned body HTML.

If you are going to extend a safelist, please be very careful. Make sure you understand what attributes may lead to @@ -297,8 +297,8 @@ public Safelist addAttributes(String tag, String... attributes) { Validate.notNull(attributes); Validate.isTrue(attributes.length > 0, "No attribute names supplied."); + addTags(tag); TagName tagName = TagName.valueOf(tag); - tagNames.add(tagName); Set attributeSet = new HashSet<>(); for (String key : attributes) { Validate.notEmpty(key); @@ -622,7 +622,7 @@ static class TagName extends TypedValue { } static TagName valueOf(String value) { - return new TagName(value); + return new TagName(Normalizer.lowerCase(value)); } } @@ -632,7 +632,7 @@ static class AttributeKey extends TypedValue { } static AttributeKey valueOf(String value) { - return new AttributeKey(value); + return new AttributeKey(Normalizer.lowerCase(value)); } } diff --git a/src/main/java/org/jsoup/safety/package-info.java b/src/main/java/org/jsoup/safety/package-info.java index 26b4b701bb..11396ebe4d 100644 --- a/src/main/java/org/jsoup/safety/package-info.java +++ b/src/main/java/org/jsoup/safety/package-info.java @@ -1,4 +1,7 @@ /** Contains the jsoup HTML cleaner, and safelist definitions. */ +@NullMarked package org.jsoup.safety; + +import org.jspecify.annotations.NullMarked; \ No newline at end of file diff --git a/src/main/java/org/jsoup/select/Collector.java b/src/main/java/org/jsoup/select/Collector.java index 00f384b6b5..02b0528384 100644 --- a/src/main/java/org/jsoup/select/Collector.java +++ b/src/main/java/org/jsoup/select/Collector.java @@ -1,12 +1,10 @@ package org.jsoup.select; import org.jsoup.nodes.Element; -import org.jsoup.nodes.Node; +import org.jspecify.annotations.Nullable; -import javax.annotation.Nullable; - -import static org.jsoup.select.NodeFilter.FilterResult.CONTINUE; -import static org.jsoup.select.NodeFilter.FilterResult.STOP; +import java.util.Optional; +import java.util.stream.Collectors; /** * Collects a list of elements that match the supplied criteria. @@ -25,15 +23,10 @@ private Collector() {} */ public static Elements collect (Evaluator eval, Element root) { eval.reset(); - Elements elements = new Elements(); - NodeTraversor.traverse((node, depth) -> { - if (node instanceof Element) { - Element el = (Element) node; - if (eval.matches(root, el)) - elements.add(el); - } - }, root); - return elements; + + return root.stream() + .filter(eval.asPredicate(root)) + .collect(Collectors.toCollection(Elements::new)); } /** @@ -45,36 +38,8 @@ public static Elements collect (Evaluator eval, Element root) { */ public static @Nullable Element findFirst(Evaluator eval, Element root) { eval.reset(); - FirstFinder finder = new FirstFinder(eval); - return finder.find(root, root); - } - - static class FirstFinder implements NodeFilter { - private @Nullable Element evalRoot = null; - private @Nullable Element match = null; - private final Evaluator eval; - - FirstFinder(Evaluator eval) { - this.eval = eval; - } - - @Nullable Element find(Element root, Element start) { - evalRoot = root; - match = null; - NodeTraversor.filter(this, start); - return match; - } - @Override - public FilterResult head(Node node, int depth) { - if (node instanceof Element) { - Element el = (Element) node; - if (eval.matches(evalRoot, el)) { - match = el; - return STOP; - } - } - return CONTINUE; - } + Optional first = root.stream().filter(eval.asPredicate(root)).findFirst(); + return first.orElse(null); } } diff --git a/src/main/java/org/jsoup/select/CombiningEvaluator.java b/src/main/java/org/jsoup/select/CombiningEvaluator.java index 3af498e68a..25e5eac2ee 100644 --- a/src/main/java/org/jsoup/select/CombiningEvaluator.java +++ b/src/main/java/org/jsoup/select/CombiningEvaluator.java @@ -2,8 +2,8 @@ import org.jsoup.internal.StringUtil; import org.jsoup.nodes.Element; +import org.jspecify.annotations.Nullable; -import javax.annotation.Nullable; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; diff --git a/src/main/java/org/jsoup/select/Elements.java b/src/main/java/org/jsoup/select/Elements.java index 31838e1f89..c344f2e06d 100644 --- a/src/main/java/org/jsoup/select/Elements.java +++ b/src/main/java/org/jsoup/select/Elements.java @@ -8,20 +8,23 @@ import org.jsoup.nodes.FormElement; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; +import org.jspecify.annotations.Nullable; -import javax.annotation.Nullable; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; +import java.util.function.Predicate; +import java.util.function.UnaryOperator; /** A list of {@link Element}s, with methods that act on every element in the list. -

- To get an {@code Elements} object, use the {@link Element#select(String)} method. -

+

To get an {@code Elements} object, use the {@link Element#select(String)} method.

+

Methods that {@link #set(int, Element) set}, {@link #remove(int) remove}, or {@link #replaceAll(UnaryOperator) + replace} Elements in the list will also act on the underlying {@link org.jsoup.nodes.Document DOM}.

@author Jonathan Hedley, jonathan@hedley.net */ public class Elements extends ArrayList { @@ -62,7 +65,7 @@ public Elements clone() { /** Get an attribute value from the first matched element that has the attribute. @param attributeKey The attribute key. - @return The attribute value from the first matched element that has the attribute.. If no elements were matched (isEmpty() == true), + @return The attribute value from the first matched element that has the attribute. If no elements were matched (isEmpty() == true), or if the no elements have the attribute, returns empty string. @see #hasAttr(String) */ @@ -431,6 +434,7 @@ public Elements empty() { /** * Remove each matched element from the DOM. This is similar to setting the outer HTML of each element to nothing. + *

The elements will still be retained in this list, in case further processing of them is desired.

*

* E.g. HTML: {@code

Hello

there

}
* doc.select("p").remove();
@@ -440,6 +444,7 @@ public Elements empty() { * @return this, for chaining * @see Element#empty() * @see #empty() + * @see #clear() */ public Elements remove() { for (Element element : this) { @@ -683,4 +688,121 @@ private List childNodesOfType(Class tClass) { return nodes; } + // list methods that update the DOM: + + /** + Replace the Element at the specified index in this list, and in the DOM. + * @param index index of the element to replace + * @param element element to be stored at the specified position + * @return the old Element at this index + * @since 1.17.1 + */ + @Override public Element set(int index, Element element) { + Validate.notNull(element); + Element old = super.set(index, element); + old.replaceWith(element); + return old; + } + + /** + Remove the Element at the specified index in this ist, and from the DOM. + * @param index the index of the element to be removed + * @return the old element at this index + * @since 1.17.1 + */ + @Override public Element remove(int index) { + Element old = super.remove(index); + old.remove(); + return old; + } + + /** + Remove the specified Element from this list, and from th DOM + * @param o element to be removed from this list, if present + * @return if this list contained the Element + * @since 1.17.1 + */ + @Override public boolean remove(Object o) { + int index = super.indexOf(o); + if (index == -1) { + return false; + } else { + remove(index); + return true; + } + } + + /** + Removes all the elements from this list, and each of them from the DOM. + * @since 1.17.1 + * @see #remove() + */ + @Override public void clear() { + remove(); + super.clear(); + } + + /** + Removes from this list, and from the DOM, each of the elements that are contained in the specified collection and + are in this list. + * @param c collection containing elements to be removed from this list + * @return {@code true} if elements were removed from this list + * @since 1.17.1 + */ + @Override public boolean removeAll(Collection c) { + boolean anyRemoved = false; + for (Object o : c) { + anyRemoved |= this.remove(o); + } + return anyRemoved; + } + + /** + Retain in this list, and in the DOM, only the elements that are in the specified collection and are in this list. + In other words, remove elements from this list and the DOM any item that is in this list but not in the specified + collection. + * @param c collection containing elements to be retained in this list + * @return {@code true} if elements were removed from this list + * @since 1.17.1 + */ + @Override public boolean retainAll(Collection c) { + boolean anyRemoved = false; + for (Iterator it = this.iterator(); it.hasNext(); ) { + Element el = it.next(); + if (!c.contains(el)) { + it.remove(); + anyRemoved = true; + } + } + return anyRemoved; + } + + /** + Remove from the list, and from the DOM, all elements in this list that mach the given filter. + * @param filter a predicate which returns {@code true} for elements to be removed + * @return {@code true} if elements were removed from this list + * @since 1.17.1 + */ + @Override public boolean removeIf(Predicate filter) { + boolean anyRemoved = false; + for (Iterator it = this.iterator(); it.hasNext(); ) { + Element el = it.next(); + if (filter.test(el)) { + it.remove(); + anyRemoved = true; + } + } + return anyRemoved; + } + + /** + Replace each element in this list with the result of the operator, and update the DOM. + * @param operator the operator to apply to each element + * @since 1.17.1 + */ + @Override public void replaceAll(UnaryOperator operator) { + for (int i = 0; i < this.size(); i++) { + this.set(i, operator.apply(this.get(i))); + } + } } diff --git a/src/main/java/org/jsoup/select/Evaluator.java b/src/main/java/org/jsoup/select/Evaluator.java index 65040bcf64..24489691ae 100644 --- a/src/main/java/org/jsoup/select/Evaluator.java +++ b/src/main/java/org/jsoup/select/Evaluator.java @@ -12,6 +12,7 @@ import org.jsoup.parser.ParseSettings; import java.util.List; +import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -27,6 +28,16 @@ public abstract class Evaluator { protected Evaluator() { } + /** + Provides a Predicate for this Evaluator, matching the test Element. + * @param root the root Element, for match evaluation + * @return a predicate that accepts an Element to test for matches with this Evaluator + * @since 1.17.1 + */ + public Predicate asPredicate(Element root) { + return element -> matches(root, element); + } + /** * Test if the element meets the evaluator's requirements. * @@ -327,8 +338,8 @@ public String toString() { * Evaluator for attribute name/value matching (value regex matching) */ public static final class AttributeWithValueMatching extends Evaluator { - String key; - Pattern pattern; + final String key; + final Pattern pattern; public AttributeWithValueMatching(String key, Pattern pattern) { this.key = normalize(key); @@ -355,8 +366,8 @@ public String toString() { * Abstract evaluator for attribute name/value matching */ public abstract static class AttributeKeyPair extends Evaluator { - String key; - String value; + final String key; + final String value; public AttributeKeyPair(String key, String value) { this(key, value, true); @@ -541,12 +552,12 @@ public IsNthChild(int a, int b) { super(a,b); } - protected int calculatePosition(Element root, Element element) { + @Override protected int calculatePosition(Element root, Element element) { return element.elementSiblingIndex()+1; } - protected String getPseudoClass() { + @Override protected String getPseudoClass() { return "nth-child"; } } @@ -583,7 +594,7 @@ public IsNthOfType(int a, int b) { super(a, b); } - protected int calculatePosition(Element root, Element element) { + @Override protected int calculatePosition(Element root, Element element) { Element parent = element.parent(); if (parent == null) return 0; @@ -729,7 +740,7 @@ public String toString() { * @author ant */ public abstract static class IndexEvaluator extends Evaluator { - int index; + final int index; public IndexEvaluator(int index) { this.index = index; diff --git a/src/main/java/org/jsoup/select/NodeTraversor.java b/src/main/java/org/jsoup/select/NodeTraversor.java index 5b01a2f490..de8be092e9 100644 --- a/src/main/java/org/jsoup/select/NodeTraversor.java +++ b/src/main/java/org/jsoup/select/NodeTraversor.java @@ -6,16 +6,19 @@ import org.jsoup.select.NodeFilter.FilterResult; /** - * Depth-first node traversor. Use to iterate through all nodes under and including the specified root node. - *

- * This implementation does not use recursion, so a deep DOM does not risk blowing the stack. - *

+ A depth-first node traversor. Use to walk through all nodes under and including the specified root node, in document + order. The {@link NodeVisitor#head(Node, int)} and {@link NodeVisitor#tail(Node, int)} methods will be called for + each node. +

During traversal, structural changes to nodes are supported (e.g. {{@link Node#replaceWith(Node)}, + {@link Node#remove()}} +

*/ public class NodeTraversor { /** - * Start a depth-first traverse of the root and all of its descendants. - * @param visitor Node visitor. - * @param root the root node point to traverse. + Run a depth-first traverse of the root and all of its descendants. + @param visitor Node visitor. + @param root the initial node point to traverse. + @see NodeVisitor */ public static void traverse(NodeVisitor visitor, Node root) { Validate.notNull(visitor); @@ -62,9 +65,9 @@ public static void traverse(NodeVisitor visitor, Node root) { } /** - * Start a depth-first traverse of all elements. - * @param visitor Node visitor. - * @param elements Elements to filter. + Run a depth-first traversal of each Element. + @param visitor Node visitor. + @param elements Elements to traverse. */ public static void traverse(NodeVisitor visitor, Elements elements) { Validate.notNull(visitor); @@ -74,10 +77,12 @@ public static void traverse(NodeVisitor visitor, Elements elements) { } /** - * Start a depth-first filtering of the root and all of its descendants. - * @param filter Node visitor. - * @param root the root node point to traverse. - * @return The filter result of the root node, or {@link FilterResult#STOP}. + Run a depth-first filtered traversal of the root and all of its descendants. + @param filter NodeFilter visitor. + @param root the root node point to traverse. + @return The filter result of the root node, or {@link FilterResult#STOP}. + + @see NodeFilter */ public static FilterResult filter(NodeFilter filter, Node root) { Node node = root; @@ -128,9 +133,9 @@ public static FilterResult filter(NodeFilter filter, Node root) { } /** - * Start a depth-first filtering of all elements. - * @param filter Node filter. - * @param elements Elements to filter. + Run a depth-first filtered traversal of each Element. + @param filter NodeFilter visitor. + @see NodeFilter */ public static void filter(NodeFilter filter, Elements elements) { Validate.notNull(filter); diff --git a/src/main/java/org/jsoup/select/NodeVisitor.java b/src/main/java/org/jsoup/select/NodeVisitor.java index 0dbdc86ce8..6fa0fa7e13 100644 --- a/src/main/java/org/jsoup/select/NodeVisitor.java +++ b/src/main/java/org/jsoup/select/NodeVisitor.java @@ -4,13 +4,26 @@ import org.jsoup.nodes.Node; /** - * Node visitor interface. Provide an implementing class to {@link NodeTraversor} to iterate through nodes. - *

- * This interface provides two methods, {@code head} and {@code tail}. The head method is called when the node is first - * seen, and the tail method when all of the node's children have been visited. As an example, {@code head} can be used to - * emit a start tag for a node, and {@code tail} to create the end tag. - *

+ Node visitor interface. Provide an implementing class to {@link NodeTraversor} or to {@link Node#traverse(NodeVisitor)} + to iterate through nodes. +

+ This interface provides two methods, {@link #head} and {@link #tail}. The head method is called when the node is first + seen, and the tail method when all of the node's children have been visited. As an example, {@code head} can be used to + emit a start tag for a node, and {@code tail} to create the end tag. The {@code tail} method defaults to a no-op, so + the {@code head} method is the {@link FunctionalInterface}. +

+

Example:

+

+ doc.body().traverse((node, depth) -> {
+     switch (node) {
+         case Element el     -> print(el.tag() + ": " + el.ownText());
+         case DataNode data  -> print("Data: " + data.getWholeData());
+         default             -> print(node.nodeName() + " at depth " + depth);
+     }
+ });
+ 
*/ +@FunctionalInterface public interface NodeVisitor { /** Callback for when a node is first visited. diff --git a/src/main/java/org/jsoup/select/QueryParser.java b/src/main/java/org/jsoup/select/QueryParser.java index 09f53bdd00..b495336602 100644 --- a/src/main/java/org/jsoup/select/QueryParser.java +++ b/src/main/java/org/jsoup/select/QueryParser.java @@ -145,18 +145,21 @@ private void combinator(char combinator) { private String consumeSubQuery() { StringBuilder sq = StringUtil.borrowBuilder(); + boolean seenNonCombinator = false; // eat until we hit a combinator after eating something else while (!tq.isEmpty()) { if (tq.matches("(")) sq.append("(").append(tq.chompBalanced('(', ')')).append(")"); else if (tq.matches("[")) sq.append("[").append(tq.chompBalanced('[', ']')).append("]"); else if (tq.matchesAny(Combinators)) - if (sq.length() > 0) + if (seenNonCombinator) break; else - tq.consume(); - else + sq.append(tq.consume()); + else { + seenNonCombinator = true; sq.append(tq.consume()); + } } return StringUtil.releaseBuilder(sq); } @@ -189,6 +192,8 @@ private Evaluator parsePseudoSelector() { return new Evaluator.IndexEquals(consumeIndex()); case "has": return has(); + case "is": + return is(); case "contains": return contains(false); case "containsOwn": @@ -365,6 +370,13 @@ private Evaluator has() { return new StructuralEvaluator.Has(parse(subQuery)); } + // psuedo selector :is() + private Evaluator is() { + String subQuery = consumeParens(); + Validate.notEmpty(subQuery, ":is(selector) sub-select must not be empty"); + return new StructuralEvaluator.Is(parse(subQuery)); + } + // pseudo selector :contains(text), containsOwn(text) private Evaluator contains(boolean own) { String query = own ? ":containsOwn" : ":contains"; diff --git a/src/main/java/org/jsoup/select/Selector.java b/src/main/java/org/jsoup/select/Selector.java index 95c1756097..c050ff1366 100644 --- a/src/main/java/org/jsoup/select/Selector.java +++ b/src/main/java/org/jsoup/select/Selector.java @@ -2,8 +2,8 @@ import org.jsoup.helper.Validate; import org.jsoup.nodes.Element; +import org.jspecify.annotations.Nullable; -import javax.annotation.Nullable; import java.util.Collection; import java.util.IdentityHashMap; @@ -47,7 +47,8 @@ * :lt(n)elements whose sibling index is less than ntd:lt(3) finds the first 3 cells of each row * :gt(n)elements whose sibling index is greater than ntd:gt(1) finds cells after skipping the first two * :eq(n)elements whose sibling index is equal to ntd:eq(0) finds the first cell of each row - * :has(selector)elements that contains at least one element matching the selectordiv:has(p) finds divs that contain p elements.
div:has(> a) selects div elements that have at least one direct child a element. + * :has(selector)elements that contains at least one element matching the selectordiv:has(p) finds divs that contain p elements.
div:has(> a) selects div elements that have at least one direct child a element.
section:has(h1, h2) finds section elements that contain a h1 or a h2 element + * :is(selector list)elements that match any of the selectors in the selector list:is(h1, h2, h3, h4, h5, h6) finds any heading element.
:is(section, article) > :is(h1, h2) finds a h1 or h2 that is a direct child of a section or an article * :not(selector)elements that do not match the selector. See also {@link Elements#not(String)}div:not(.logo) finds all divs that do not have the "logo" class.

div:not(:has(div)) finds divs that do not contain divs.

* :contains(text)elements that contains the specified text. The search is case insensitive. The text may appear in the found element, or any of its descendants. The text is whitespace normalized.

To find content that includes parentheses, escape those with a {@code \}.

p:contains(jsoup) finds p elements containing the text "jsoup".

{@code p:contains(hello \(there\) finds p elements containing the text "Hello (There)"}

* :containsOwn(text)elements that directly contain the specified text. The search is case insensitive. The text must appear in the found element, not any of its descendants.p:containsOwn(jsoup) finds p elements with own text "jsoup". diff --git a/src/main/java/org/jsoup/select/StructuralEvaluator.java b/src/main/java/org/jsoup/select/StructuralEvaluator.java index 96ff252e5b..1e84427068 100644 --- a/src/main/java/org/jsoup/select/StructuralEvaluator.java +++ b/src/main/java/org/jsoup/select/StructuralEvaluator.java @@ -2,7 +2,7 @@ import org.jsoup.internal.StringUtil; import org.jsoup.nodes.Element; -import org.jsoup.nodes.Node; +import org.jsoup.nodes.NodeIterator; import java.util.ArrayList; import java.util.IdentityHashMap; @@ -59,23 +59,21 @@ public boolean matches(Element root, Element element) { } static class Has extends StructuralEvaluator { - final Collector.FirstFinder finder; + final NodeIterator it = new NodeIterator<>(new Element("html"), Element.class); + // the element here is just a placeholder so this can be final - gets set in restart() public Has(Evaluator evaluator) { super(evaluator); - finder = new Collector.FirstFinder(evaluator); } - @Override - public boolean matches(Element root, Element element) { + @Override public boolean matches(Element root, Element element) { // for :has, we only want to match children (or below), not the input element. And we want to minimize GCs - for (int i = 0; i < element.childNodeSize(); i++) { - Node node = element.childNode(i); - if (node instanceof Element) { - Element match = finder.find(element, (Element) node); - if (match != null) - return true; - } + it.restart(element); + while (it.hasNext()) { + Element el = it.next(); + if (el == element) continue; // don't match self, only descendants + if (evaluator.matches(element, el)) + return true; } return false; } @@ -90,6 +88,27 @@ public String toString() { } } + /** Implements the :is(sub-query) pseudo-selector */ + static class Is extends StructuralEvaluator { + public Is(Evaluator evaluator) { + super(evaluator); + } + + @Override + public boolean matches(Element root, Element element) { + return evaluator.matches(root, element); + } + + @Override protected int cost() { + return 2 + evaluator.cost(); + } + + @Override + public String toString() { + return String.format(":is(%s)", evaluator); + } + } + static class Not extends StructuralEvaluator { public Not(Evaluator evaluator) { super(evaluator); @@ -189,7 +208,9 @@ void add(Evaluator evaluator) { @Override public boolean matches(Element root, Element element) { - // evaluate from last to first + if (element == root) + return false; // cannot match as the second eval (first parent test) would be above the root + for (int i = evaluators.size() -1; i >= 0; --i) { if (element == null) return false; diff --git a/src/main/java/org/jsoup/select/package-info.java b/src/main/java/org/jsoup/select/package-info.java index 5bbdb85543..34d3388aad 100644 --- a/src/main/java/org/jsoup/select/package-info.java +++ b/src/main/java/org/jsoup/select/package-info.java @@ -2,7 +2,7 @@ Packages to support the CSS-style element selector. {@link org.jsoup.select.Selector Selector defines the query syntax.} */ -@NonnullByDefault +@NullMarked package org.jsoup.select; -import org.jsoup.internal.NonnullByDefault; +import org.jspecify.annotations.NullMarked; diff --git a/src/main/java9/module-info.java b/src/main/java9/module-info.java new file mode 100644 index 0000000000..31bd333877 --- /dev/null +++ b/src/main/java9/module-info.java @@ -0,0 +1,11 @@ +module org.jsoup { + exports org.jsoup; + exports org.jsoup.helper; + exports org.jsoup.nodes; + exports org.jsoup.parser; + exports org.jsoup.safety; + exports org.jsoup.select; + + requires transitive java.xml; // for org.w3c.dom out of W3CDom + requires static org.jspecify; // nullability annotations +} diff --git a/src/main/java9/org/jsoup/helper/RequestAuthHandler.java b/src/main/java9/org/jsoup/helper/RequestAuthHandler.java new file mode 100644 index 0000000000..0df80de209 --- /dev/null +++ b/src/main/java9/org/jsoup/helper/RequestAuthHandler.java @@ -0,0 +1,24 @@ +package org.jsoup.helper; + +import java.net.HttpURLConnection; + +/** + A per-request authentication shim, used in Java 9+. + */ +class RequestAuthHandler implements AuthenticationHandler.AuthShim { + public RequestAuthHandler() {} + + @Override public void enable(RequestAuthenticator auth, HttpURLConnection con) { + AuthenticationHandler authenticator = new AuthenticationHandler(auth); + con.setAuthenticator(authenticator); + } + + @Override public void remove() { + // noop; would remove thread-local in Global Handler + } + + @Override public AuthenticationHandler get(AuthenticationHandler helper) { + // would get thread-local in Global Handler + return helper; + } +} diff --git a/src/test/java/org/jsoup/helper/AuthenticationHandlerTest.java b/src/test/java/org/jsoup/helper/AuthenticationHandlerTest.java new file mode 100644 index 0000000000..c0dd692d37 --- /dev/null +++ b/src/test/java/org/jsoup/helper/AuthenticationHandlerTest.java @@ -0,0 +1,7 @@ +package org.jsoup.helper; + +public class AuthenticationHandlerTest { + public static final int MaxAttempts = AuthenticationHandler.MaxAttempts; + + // tests are in ConnectionTest, ProxyTest. This class just makes the MaxAttempts visible for test. +} diff --git a/src/test/java/org/jsoup/helper/HttpConnectionTest.java b/src/test/java/org/jsoup/helper/HttpConnectionTest.java index 8df0f80397..5757d1d27a 100644 --- a/src/test/java/org/jsoup/helper/HttpConnectionTest.java +++ b/src/test/java/org/jsoup/helper/HttpConnectionTest.java @@ -7,7 +7,9 @@ import org.junit.jupiter.api.Test; import java.io.IOException; +import java.net.Authenticator; import java.net.MalformedURLException; +import java.net.PasswordAuthentication; import java.net.URL; import java.util.ArrayList; import java.util.Collection; @@ -373,4 +375,37 @@ public void caseInsensitiveHeaders(Locale locale) { String actual = connect.request().header("Key"); assertEquals(value, actual); } + + @Test void setAuth() throws MalformedURLException { + Connection con = Jsoup.newSession(); + + assertNull(con.request().auth()); + + RequestAuthenticator auth1 = new RequestAuthenticator() { + @Override public PasswordAuthentication authenticate(Context auth) { + return auth.credentials("foo", "bar"); + } + }; + + RequestAuthenticator auth2 = new RequestAuthenticator() { + @Override public PasswordAuthentication authenticate(Context auth) { + return auth.credentials("qux", "baz"); + } + }; + + con.auth(auth1); + assertSame(con.request().auth(), auth1); + + con.auth(auth2); + assertSame(con.request().auth(), auth2); + + con.request().auth(auth1); + assertSame(con.request().auth(), auth1); + + PasswordAuthentication creds = auth1.authenticate( + new RequestAuthenticator.Context(new URL("http://example.com"), Authenticator.RequestorType.SERVER, "Realm")); + assertNotNull(creds); + assertEquals("foo", creds.getUserName()); + assertEquals("bar", new String(creds.getPassword())); + } } diff --git a/src/test/java/org/jsoup/helper/W3CDomTest.java b/src/test/java/org/jsoup/helper/W3CDomTest.java index c1daeb57b0..fe8379aea8 100644 --- a/src/test/java/org/jsoup/helper/W3CDomTest.java +++ b/src/test/java/org/jsoup/helper/W3CDomTest.java @@ -345,5 +345,26 @@ public void canOutputHtmlWithoutNamespace() { org.jsoup.nodes.TextNode jText = (TextNode) jDiv.childNode(0).childNode(0); assertEquals(jText, textNode.getUserData(W3CDom.SourceProperty)); } + + @Test public void canXmlParseCdataNodes() throws XPathExpressionException { + String html = "

5 && 6

"; + org.jsoup.nodes.Document jdoc = Jsoup.parse(html); + jdoc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml); + String xml = jdoc.body().html(); + assertTrue(xml.contains(" 5 && 6

"; + Document doc = Jsoup.parse(html); // parsed as HTML + String out = TextUtil.normalizeSpaces(doc.body().html()); + assertEquals(html, out); + Element scriptEl = doc.expectFirst("script"); + DataNode scriptDataNode = (DataNode) scriptEl.childNode(0); + assertEquals("1 && 2", scriptDataNode.getWholeData()); + + doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml); + String xml = doc.body().html(); + assertEquals( + "

5 && 6

", + TextUtil.normalizeSpaces(xml)); + + Document xmlDoc = Jsoup.parse(xml, Parser.xmlParser()); + assertEquals(xml, xmlDoc.html()); + Element scriptXmlEl = xmlDoc.expectFirst("script"); + CDataNode scriptCdata = (CDataNode) scriptXmlEl.childNode(0); + assertEquals(scriptCdata.text(), scriptDataNode.getWholeData()); + } + @Test void outerHtmlAppendable() { // tests not string builder flow Document doc = Jsoup.parse("
One
"); @@ -2816,4 +2850,19 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) { assertEquals("

One

", header.html()); } + + @Test void xmlSyntaxSetsEscapeMode() { + String html = "Foo ≻"; + Document doc = Jsoup.parse(html); + doc.outputSettings().charset("ascii"); // so we can see the zws + assertEquals("Foo ≻", doc.body().html()); + + doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml); + String out = doc.body().html(); + assertEquals("Foo ≻", out); + + // can set back if desired + doc.outputSettings().escapeMode(Entities.EscapeMode.extended); + assertEquals("Foo ≻", doc.body().html()); // succ is alias for Succeeds, and first hit in entities + } } diff --git a/src/test/java/org/jsoup/nodes/NodeIteratorTest.java b/src/test/java/org/jsoup/nodes/NodeIteratorTest.java new file mode 100644 index 0000000000..ab7e9345e5 --- /dev/null +++ b/src/test/java/org/jsoup/nodes/NodeIteratorTest.java @@ -0,0 +1,266 @@ +package org.jsoup.nodes; + +import org.jsoup.Jsoup; +import org.junit.jupiter.api.Test; + +import java.util.NoSuchElementException; + +import static org.junit.jupiter.api.Assertions.*; + +class NodeIteratorTest { + String html = "

One

Two

Three

Four

"; + + @Test void canIterateNodes() { + Document doc = Jsoup.parse(html); + NodeIterator it = NodeIterator.from(doc); + assertIterates(it, "#root;html;head;body;div#1;p;One;p;Two;div#2;p;Three;p;Four;"); + // todo - need to review that the Document object #root holds the html element as child. Why not have document root == html element? + assertFalse(it.hasNext()); + + boolean threw = false; + try { + it.next(); + } catch (NoSuchElementException e) { + threw = true; + } + assertTrue(threw); + } + + @Test void hasNextIsPure() { + Document doc = Jsoup.parse(html); + NodeIterator it = NodeIterator.from(doc); + assertTrue(it.hasNext()); + assertTrue(it.hasNext()); + assertIterates(it, "#root;html;head;body;div#1;p;One;p;Two;div#2;p;Three;p;Four;"); + assertFalse(it.hasNext()); + } + + @Test void iterateSubTree() { + Document doc = Jsoup.parse(html); + + Element div1 = doc.expectFirst("div#1"); + NodeIterator it = NodeIterator.from(div1); + assertIterates(it, "div#1;p;One;p;Two;"); + assertFalse(it.hasNext()); + + Element div2 = doc.expectFirst("div#2"); + NodeIterator it2 = NodeIterator.from(div2); + assertIterates(it2, "div#2;p;Three;p;Four;"); + assertFalse(it2.hasNext()); + } + + @Test void canRestart() { + Document doc = Jsoup.parse(html); + + NodeIterator it = NodeIterator.from(doc); + assertIterates(it, "#root;html;head;body;div#1;p;One;p;Two;div#2;p;Three;p;Four;"); + + it.restart(doc.expectFirst("div#2")); + assertIterates(it, "div#2;p;Three;p;Four;"); + } + + @Test void canIterateJustOneSibling() { + Document doc = Jsoup.parse(html); + Element p2 = doc.expectFirst("p:contains(Two)"); + assertEquals("Two", p2.text()); + + NodeIterator it = NodeIterator.from(p2); + assertIterates(it, "p;Two;"); + + NodeIterator elIt = new NodeIterator<>(p2, Element.class); + Element found = elIt.next(); + assertSame(p2, found); + assertFalse(elIt.hasNext()); + } + + @Test void canIterateFirstEmptySibling() { + Document doc = Jsoup.parse("

.

..

"); + Element p1 = doc.expectFirst("p#1"); + assertEquals("", p1.ownText()); + + NodeIterator it = NodeIterator.from(p1); + assertTrue(it.hasNext()); + Node node = it.next(); + assertSame(p1, node); + assertFalse(it.hasNext()); + } + + @Test void canRemoveViaIterator() { + String html = "

One

Two

Three

Four

Out2"; + Document doc = Jsoup.parse(html); + + NodeIterator it = NodeIterator.from(doc); + StringBuilder seen = new StringBuilder(); + while (it.hasNext()) { + Node node = it.next(); + if (node.attr("id").equals("1")) + it.remove(); + trackSeen(node, seen); + } + assertEquals("#root;html;head;body;div#out1;div#1;div#2;p;Three;p;Four;div#out2;Out2;", seen.toString()); + assertContents(doc, "#root;html;head;body;div#out1;div#2;p;Three;p;Four;div#out2;Out2;"); + + it = NodeIterator.from(doc); + seen = new StringBuilder(); + while (it.hasNext()) { + Node node = it.next(); + if (node.attr("id").equals("2")) + it.remove(); + trackSeen(node, seen); + } + assertEquals("#root;html;head;body;div#out1;div#2;div#out2;Out2;", seen.toString()); + assertContents(doc, "#root;html;head;body;div#out1;div#out2;Out2;"); + } + + @Test void canRemoveViaNode() { + String html = "

One

Two

Three

Four

Out2"; + Document doc = Jsoup.parse(html); + + NodeIterator it = NodeIterator.from(doc); + StringBuilder seen = new StringBuilder(); + while (it.hasNext()) { + Node node = it.next(); + if (node.attr("id").equals("1")) + node.remove(); + trackSeen(node, seen); + } + assertEquals("#root;html;head;body;div#out1;div#1;div#2;p;Three;p;Four;div#out2;Out2;", seen.toString()); + assertContents(doc, "#root;html;head;body;div#out1;div#2;p;Three;p;Four;div#out2;Out2;"); + + it = NodeIterator.from(doc); + seen = new StringBuilder(); + while (it.hasNext()) { + Node node = it.next(); + if (node.attr("id").equals("2")) + node.remove(); + trackSeen(node, seen); + } + assertEquals("#root;html;head;body;div#out1;div#2;div#out2;Out2;", seen.toString()); + assertContents(doc, "#root;html;head;body;div#out1;div#out2;Out2;"); + } + + @Test void canReplace() { + String html = "

One

Two

Three

Four

Out2"; + Document doc = Jsoup.parse(html); + + NodeIterator it = NodeIterator.from(doc); + StringBuilder seen = new StringBuilder(); + while (it.hasNext()) { + Node node = it.next(); + trackSeen(node, seen); + if (node.attr("id").equals("1")) { + node.replaceWith(new Element("span").text("Foo")); + } + } + assertEquals("#root;html;head;body;div#out1;div#1;span;Foo;div#2;p;Three;p;Four;div#out2;Out2;", seen.toString()); + // ^^ we don't see

One, do see the replaced in , and the subsequent nodes + assertContents(doc, "#root;html;head;body;div#out1;span;Foo;div#2;p;Three;p;Four;div#out2;Out2;"); + + it = NodeIterator.from(doc); + seen = new StringBuilder(); + while (it.hasNext()) { + Node node = it.next(); + trackSeen(node, seen); + if (node.attr("id").equals("2")) { + node.replaceWith(new Element("span").text("Bar")); + } + } + assertEquals("#root;html;head;body;div#out1;span;Foo;div#2;span;Bar;div#out2;Out2;", seen.toString()); + assertContents(doc, "#root;html;head;body;div#out1;span;Foo;span;Bar;div#out2;Out2;"); + } + + @Test void canWrap() { + Document doc = Jsoup.parse(html); + NodeIterator it = NodeIterator.from(doc); + boolean sawInner = false; + while (it.hasNext()) { + Node node = it.next(); + if (node.attr("id").equals("1")) { + node.wrap("

"); + } + if (node instanceof TextNode && ((TextNode) node).text().equals("One")) + sawInner = true; + } + assertContents(doc, "#root;html;head;body;div#outer;div#1;p;One;p;Two;div#2;p;Three;p;Four;"); + assertTrue(sawInner); + } + + @Test void canFilterForElements() { + Document doc = Jsoup.parse(html); + NodeIterator it = new NodeIterator<>(doc, Element.class); + + StringBuilder seen = new StringBuilder(); + while (it.hasNext()) { + Element el = it.next(); + assertNotNull(el); + trackSeen(el, seen); + } + + assertEquals("#root;html;head;body;div#1;p;p;div#2;p;p;", seen.toString()); + } + + @Test void canFilterForTextNodes() { + Document doc = Jsoup.parse(html); + NodeIterator it = new NodeIterator<>(doc, TextNode.class); + + StringBuilder seen = new StringBuilder(); + while (it.hasNext()) { + TextNode text = it.next(); + assertNotNull(text); + trackSeen(text, seen); + } + + assertEquals("One;Two;Three;Four;", seen.toString()); + assertContents(doc, "#root;html;head;body;div#1;p;One;p;Two;div#2;p;Three;p;Four;"); + } + + @Test void canModifyFilteredElements() { + Document doc = Jsoup.parse(html); + NodeIterator it = new NodeIterator<>(doc, Element.class); + + StringBuilder seen = new StringBuilder(); + while (it.hasNext()) { + Element el = it.next(); + if (!el.ownText().isEmpty()) + el.text(el.ownText() + "++"); + trackSeen(el, seen); + } + + assertEquals("#root;html;head;body;div#1;p;p;div#2;p;p;", seen.toString()); + assertContents(doc, "#root;html;head;body;div#1;p;One++;p;Two++;div#2;p;Three++;p;Four++;"); + } + + static void assertIterates(NodeIterator it, String expected) { + Node previous = null; + StringBuilder actual = new StringBuilder(); + while (it.hasNext()) { + Node node = it.next(); + assertNotNull(node); + assertNotSame(previous, node); + + trackSeen(node, actual); + previous = node; + } + assertEquals(expected, actual.toString()); + } + + static void assertContents(Element el, String expected) { + NodeIterator it = NodeIterator.from(el); + assertIterates(it, expected); + } + + static void trackSeen(Node node, StringBuilder actual) { + if (node instanceof Element) { + Element el = (Element) node; + actual.append(el.tagName()); + if (el.hasAttr("id")) + actual.append("#").append(el.id()); + } + else if (node instanceof TextNode) + actual.append(((TextNode) node).text()); + else + actual.append(node.nodeName()); + actual.append(";"); + } + +} \ No newline at end of file diff --git a/src/test/java/org/jsoup/nodes/NodeStreamTest.java b/src/test/java/org/jsoup/nodes/NodeStreamTest.java new file mode 100644 index 0000000000..b8aadaf83f --- /dev/null +++ b/src/test/java/org/jsoup/nodes/NodeStreamTest.java @@ -0,0 +1,70 @@ +package org.jsoup.nodes; + +import org.jsoup.Jsoup; +import org.junit.jupiter.api.Test; + +import java.util.Optional; +import java.util.stream.Stream; + +import static org.jsoup.nodes.NodeIteratorTest.trackSeen; +import static org.jsoup.nodes.NodeIteratorTest.assertContents; +import static org.junit.jupiter.api.Assertions.*; + +public class NodeStreamTest { + + String html = "

One

Two

Three

Four

"; + + + @Test void canStream() { + Document doc = Jsoup.parse(html); + StringBuilder seen = new StringBuilder(); + Stream stream = doc.nodeStream(); + stream.forEachOrdered(node -> trackSeen(node, seen)); + assertEquals("#root;html;head;body;div#1;p;One;p;Two;div#2;p;Three;p;Four;", seen.toString()); + } + + @Test void canStreamParallel() { + Document doc = Jsoup.parse(html); + long count = doc.nodeStream().parallel().count(); + assertEquals(14, count); + } + + @Test void canFindFirst() { + Document doc = Jsoup.parse(html); + Optional first = doc.nodeStream().findFirst(); + assertTrue(first.isPresent()); + assertSame(doc, first.get()); + } + + @Test void canFilter() { + Document doc = Jsoup.parse(html); + StringBuilder seen = new StringBuilder(); + + doc.nodeStream() + .filter(node -> node instanceof TextNode) + .forEach(node -> trackSeen(node, seen)); + + assertEquals("One;Two;Three;Four;", seen.toString()); + } + + @Test void canRemove() { + String html = "

One

Two

Three

Four

Five"; + Document doc = Jsoup.parse(html); + + doc.nodeStream() + .filter(node -> node instanceof Element) + .filter(node -> node.attr("id").equals("1") || node.attr("id").equals("2")) + .forEach(Node::remove); + + assertContents(doc, "#root;html;head;body;div#3;p;Five;"); + } + + @Test void elementStream() { + Document doc = Jsoup.parse(html); + StringBuilder seen = new StringBuilder(); + Stream stream = doc.stream(); + stream.forEachOrdered(node -> trackSeen(node, seen)); + assertEquals("#root;html;head;body;div#1;p;p;div#2;p;p;", seen.toString()); + } + +} diff --git a/src/test/java/org/jsoup/nodes/PositionTest.java b/src/test/java/org/jsoup/nodes/PositionTest.java index 1db73c9e97..ee6f49d79f 100644 --- a/src/test/java/org/jsoup/nodes/PositionTest.java +++ b/src/test/java/org/jsoup/nodes/PositionTest.java @@ -4,12 +4,11 @@ import org.jsoup.integration.servlets.FileServlet; import org.jsoup.parser.Parser; import org.jsoup.select.Elements; -import org.jsoup.select.NodeTraversor; import org.junit.jupiter.api.Test; import java.io.IOException; -import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; import static org.junit.jupiter.api.Assertions.*; @@ -32,9 +31,10 @@ class PositionTest { } @Test void tracksPosition() { - String html = "

\nHello\n ®\n there ©. now.\n "; - Document doc = Jsoup.parse(html, TrackingParser); + String content = "

\nHello\n ®\n there ©. now.\n "; + Document doc = Jsoup.parse(content, TrackingParser); + Element html = doc.expectFirst("html"); Element body = doc.expectFirst("body"); Element p = doc.expectFirst("p"); Element span = doc.expectFirst("span"); @@ -45,14 +45,28 @@ class PositionTest { Comment comment = (Comment) now.nextSibling(); assertNotNull(comment); - assertFalse(body.sourceRange().isTracked()); + // implicit + assertTrue(body.sourceRange().isTracked()); + assertTrue(body.endSourceRange().isTracked()); + assertTrue(body.sourceRange().isImplicit()); + assertTrue(body.endSourceRange().isImplicit()); + Range htmlRange = html.sourceRange(); + assertEquals("1,1:0-1,1:0", htmlRange.toString()); + assertEquals(htmlRange, body.sourceRange()); + assertEquals(html.endSourceRange(), body.endSourceRange()); + Range pRange = p.sourceRange(); assertEquals("1,1:0-2,12:19", pRange.toString()); + assertFalse(pRange.isImplicit()); + assertTrue(p.endSourceRange().isImplicit()); + assertEquals("6,19:83-6,19:83", p.endSourceRange().toString()); + assertEquals(p.endSourceRange(), html.endSourceRange()); // no explicit P closer Range pEndRange = p.endSourceRange(); - assertFalse(pEndRange.isTracked()); + assertTrue(pEndRange.isTracked()); + assertTrue(pEndRange.isImplicit()); Range.Position pStart = pRange.start(); assertTrue(pStart.isTracked()); @@ -88,6 +102,76 @@ class PositionTest { assertEquals("6,18:82", comment.sourceRange().end().toString()); } + @Test void tracksExpectedPoppedElements() { + // When TreeBuilder hits a direct .pop(), vs popToClose(..) + String html = "

One

Two

"; + Document doc = Jsoup.parse(html, TrackingParser); + + StringBuilder track = new StringBuilder(); + doc.expectFirst("html").stream().forEach(el -> { + accumulatePositions(el, track); + assertTrue(el.sourceRange().isTracked(), el.tagName()); + assertTrue(el.endSourceRange().isTracked(), el.tagName()); + assertFalse(el.sourceRange().isImplicit(), el.tagName()); + assertFalse(el.endSourceRange().isImplicit(), el.tagName()); + }); + assertEquals("html:0-6~63-70; head:6-12~18-25; meta:12-18~12-18; body:25-31~56-63; img:31-36~31-36; p:36-39~42-46; p:46-49~52-56; ", track.toString()); + + StringBuilder textTrack = new StringBuilder(); + doc.nodeStream(TextNode.class).forEach(text -> accumulatePositions(text, textTrack)); + assertEquals("#text:39-42; #text:49-52; ", textTrack.toString()); + } + + static void accumulatePositions(Node node, StringBuilder sb) { + sb + .append(node.nodeName()) + .append(':') + .append(node.sourceRange().startPos()) + .append('-') + .append(node.sourceRange().endPos()); + + if (node instanceof Element) { + Element el = (Element) node; + sb + .append("~") + .append(el.endSourceRange().startPos()) + .append('-') + .append(el.endSourceRange().endPos()); + } + sb.append("; "); + } + + @Test void tracksImplicitPoppedElements() { + // When TreeBuilder hits a direct .pop(), vs popToClose(..) + String html = "

One

Two

Three"; + Document doc = Jsoup.parse(html, TrackingParser); + + StringBuilder track = new StringBuilder(); + doc.expectFirst("html").stream().forEach(el -> { + assertTrue(el.sourceRange().isTracked()); + assertTrue(el.endSourceRange().isTracked()); + accumulatePositions(el, track); + }); + + assertTrue(doc.expectFirst("p").endSourceRange().isImplicit()); + assertFalse(doc.expectFirst("meta").endSourceRange().isImplicit()); + assertEquals("html:0-0~31-31; head:0-0~6-6; meta:0-6~0-6; body:6-6~31-31; img:6-11~6-11; p:11-14~17-17; p:17-20~23-23; p:23-26~31-31; ", track.toString()); + } + private void printRange(Node node) { + if (node instanceof Element) { + Element el = (Element) node; + System.out.println(el.tagName() + "\t" + + el.sourceRange().start().pos() + "-" + el.sourceRange().end().pos() + + "\t... " + + el.endSourceRange().start().pos() + "-" + el.endSourceRange().end().pos() + ); + } else { + System.out.println(node.nodeName() + "\t" + + node.sourceRange().start().pos() + "-" + node.sourceRange().end().pos() + ); + } + } + @Test void tracksMarkup() { String html = "\njsoup ©\n2022\n\n]]>"; Document doc = Jsoup.parse(html, TrackingParser); @@ -177,19 +261,19 @@ class PositionTest { String html = "foobarquxcoo
baz
"; Document doc = Jsoup.parse(html, TrackingParser); - List textNodes = new ArrayList<>(); - NodeTraversor.traverse((Node node, int depth) -> { - if (node instanceof TextNode) { - textNodes.add((TextNode) node); - } - }, doc); + StringBuilder track = new StringBuilder(); + List textNodes = doc.nodeStream(TextNode.class) + .peek(node -> accumulatePositions(node, track)) + .collect(Collectors.toList()); assertEquals(5, textNodes.size()); - assertEquals("1,8:7-1,11:10", textNodes.get(0).sourceRange().toString()); - assertEquals("1,15:14-1,18:17", textNodes.get(1).sourceRange().toString()); - assertEquals("1,22:21-1,25:24", textNodes.get(2).sourceRange().toString()); - assertEquals("1,30:29-1,33:32", textNodes.get(3).sourceRange().toString()); - assertEquals("1,38:37-1,41:40", textNodes.get(4).sourceRange().toString()); + assertEquals("foo", textNodes.get(0).text()); + assertEquals("bar", textNodes.get(1).text()); + assertEquals("baz", textNodes.get(2).text()); + assertEquals("qux", textNodes.get(3).text()); + assertEquals("coo", textNodes.get(4).text()); + + assertEquals("#text:7-10; #text:14-17; #text:21-24; #text:29-32; #text:37-40; ", track.toString()); } @Test void tracksClosingHtmlTagsInXml() { @@ -219,4 +303,76 @@ class PositionTest { assertEquals("1,20:19-1,25:24", h2.endSourceRange().toString()); } + @Test void tracksAttributes() { + String html = "

Text"; + Document doc = Jsoup.parse(html, TrackingParser); + + Element div = doc.expectFirst("div"); + + StringBuilder track = new StringBuilder(); + for (Attribute attr : div.attributes()) { + + Range.AttributeRange attrRange = attr.sourceRange(); + assertTrue(attrRange.nameRange().isTracked()); + assertTrue(attrRange.valueRange().isTracked()); + assertSame(attrRange, div.attributes().sourceRange(attr.getKey())); + + assertFalse(attrRange.nameRange().isImplicit()); + if (attr.getValue().isEmpty()) + assertTrue(attrRange.valueRange().isImplicit()); + else + assertFalse(attrRange.valueRange().isImplicit()); + + accumulatePositions(attr, track); + } + + System.out.println(track); + assertEquals("one:5-8=10-21; id:23-25=26-27; class:28-33=34-37; attr1:38-43=47-60; attr2:62-67=69-78; attr3:80-85=85-85; attr4:89-94=94-94; attr5:95-100=100-100; ", track.toString()); + } + + @Test void tracksAttributesAcrossLines() { + String html = "
Text"; + Document doc = Jsoup.parse(html, TrackingParser); + + Element div = doc.expectFirst("div"); + + StringBuilder track = new StringBuilder(); + for (Attribute attr : div.attributes()) { + Range.AttributeRange attrRange = attr.sourceRange(); + assertTrue(attrRange.nameRange().isTracked()); + assertTrue(attrRange.valueRange().isTracked()); + assertSame(attrRange, div.attributes().sourceRange(attr.getKey())); + assertFalse(attrRange.nameRange().isImplicit()); + if (attr.getValue().isEmpty()) + assertTrue(attrRange.valueRange().isImplicit()); + else + assertFalse(attrRange.valueRange().isImplicit()); + accumulatePositions(attr, track); + } + + String value = div.attributes().get("class"); + assertEquals("foo", value); + Range.AttributeRange foo = div.attributes().sourceRange("class"); + assertEquals("4,1:30-4,6:35=5,1:37-5,4:40", foo.toString()); + + assertEquals("one:5-8=10-21; id:24-26=27-28; class:30-35=37-40; attr5:41-46=46-46; ", track.toString()); + } + + static void accumulatePositions(Attribute attr, StringBuilder sb) { + Range.AttributeRange range = attr.sourceRange(); + + sb + .append(attr.getKey()) + .append(':') + .append(range.nameRange().startPos()) + .append('-') + .append(range.nameRange().endPos()) + + .append('=') + .append(range.valueRange().startPos()) + .append('-') + .append(range.valueRange().endPos()); + + sb.append("; "); + } } \ No newline at end of file diff --git a/src/test/java/org/jsoup/parser/CharacterReaderTest.java b/src/test/java/org/jsoup/parser/CharacterReaderTest.java index 5aa9a58e74..7071bfe51d 100644 --- a/src/test/java/org/jsoup/parser/CharacterReaderTest.java +++ b/src/test/java/org/jsoup/parser/CharacterReaderTest.java @@ -433,14 +433,14 @@ public void notEmptyAtBufferSplitPoint() { assertEquals(12, noTrack.pos()); assertEquals(1, noTrack.lineNumber()); assertEquals(13, noTrack.columnNumber()); - assertEquals("1:13", noTrack.cursorPos()); + assertEquals("1:13", noTrack.posLineCol()); // get over the buffer while (!noTrack.matches("[foo]")) noTrack.consumeTo("[foo]"); assertEquals(32778, noTrack.pos()); assertEquals(1, noTrack.lineNumber()); assertEquals(noTrack.pos()+1, noTrack.columnNumber()); - assertEquals("1:32779", noTrack.cursorPos()); + assertEquals("1:32779", noTrack.posLineCol()); // and the line numbers: "\n\n\n" assertEquals(0, track.pos()); @@ -462,24 +462,24 @@ public void notEmptyAtBufferSplitPoint() { assertEquals(12, track.pos()); assertEquals(3, track.lineNumber()); assertEquals(1, track.columnNumber()); - assertEquals("3:1", track.cursorPos()); + assertEquals("3:1", track.posLineCol()); assertEquals("", track.consumeTo('\n')); - assertEquals("3:6", track.cursorPos()); + assertEquals("3:6", track.posLineCol()); // get over the buffer while (!track.matches("[foo]")) track.consumeTo("[foo]"); assertEquals(32778, track.pos()); assertEquals(4, track.lineNumber()); assertEquals(32761, track.columnNumber()); - assertEquals("4:32761", track.cursorPos()); + assertEquals("4:32761", track.posLineCol()); track.consumeTo('\n'); - assertEquals("4:32766", track.cursorPos()); + assertEquals("4:32766", track.posLineCol()); track.consumeTo("[bar]"); assertEquals(5, track.lineNumber()); - assertEquals("5:1", track.cursorPos()); + assertEquals("5:1", track.posLineCol()); track.consumeToEnd(); - assertEquals("5:6", track.cursorPos()); + assertEquals("5:6", track.posLineCol()); } @Test public void countsColumnsOverBufferWhenNoNewlines() { @@ -490,7 +490,7 @@ public void notEmptyAtBufferSplitPoint() { CharacterReader reader = new CharacterReader(content); reader.trackNewlines(true); - assertEquals("1:1", reader.cursorPos()); + assertEquals("1:1", reader.posLineCol()); while (!reader.isEmpty()) reader.consume(); assertEquals(131096, reader.pos()); @@ -515,4 +515,32 @@ public void notEmptyAtBufferSplitPoint() { assertEquals(14, reader.columnNumber()); } + @Test public void consumeDoubleQuotedAttributeConsumesThruSingleQuote() { + String html = "He'llo\" >"; + CharacterReader r = new CharacterReader(html); + assertEquals("He'llo", r.consumeAttributeQuoted(false)); + assertEquals('"', r.consume()); + } + + @Test public void consumeSingleQuotedAttributeConsumesThruDoubleQuote() { + String html = "He\"llo' >"; + CharacterReader r = new CharacterReader(html); + assertEquals("He\"llo", r.consumeAttributeQuoted(true)); + assertEquals('\'', r.consume()); + } + + @Test public void consumeDoubleQuotedAttributeConsumesThruSingleQuoteToAmp() { + String html = "He'llo ©\" >"; + CharacterReader r = new CharacterReader(html); + assertEquals("He'llo ", r.consumeAttributeQuoted(false)); + assertEquals('&', r.consume()); + } + + @Test public void consumeSingleQuotedAttributeConsumesThruDoubleQuoteToAmp() { + String html = "He\"llo ©' >"; + CharacterReader r = new CharacterReader(html); + assertEquals("He\"llo ", r.consumeAttributeQuoted(true)); + assertEquals('&', r.consume()); + } + } diff --git a/src/test/java/org/jsoup/parser/HtmlTreeBuilderTest.java b/src/test/java/org/jsoup/parser/HtmlTreeBuilderTest.java index dae8e4600f..dcd41b8804 100644 --- a/src/test/java/org/jsoup/parser/HtmlTreeBuilderTest.java +++ b/src/test/java/org/jsoup/parser/HtmlTreeBuilderTest.java @@ -1,14 +1,11 @@ package org.jsoup.parser; +import org.jspecify.annotations.NullMarked; import org.junit.jupiter.api.Test; - -import javax.annotation.Nonnull; -import javax.annotation.ParametersAreNonnullByDefault; import java.io.Reader; import java.lang.annotation.Annotation; import java.lang.reflect.Method; -import java.util.Arrays; import java.util.List; import static org.junit.jupiter.api.Assertions.*; @@ -31,12 +28,10 @@ public void nonnull() { } @Test public void nonnullAssertions() throws NoSuchMethodException { - Method parseMethod = TreeBuilder.class.getDeclaredMethod("parse", Reader.class, String.class, Parser.class); - assertNotNull(parseMethod); - Annotation[] declaredAnnotations = parseMethod.getDeclaredAnnotations(); + Annotation[] declaredAnnotations = TreeBuilder.class.getPackage().getDeclaredAnnotations(); boolean seen = false; for (Annotation annotation : declaredAnnotations) { - if (annotation.annotationType().isAssignableFrom(ParametersAreNonnullByDefault.class)) + if (annotation.annotationType().isAssignableFrom(NullMarked.class)) seen = true; } diff --git a/src/test/java/org/jsoup/safety/CleanerTest.java b/src/test/java/org/jsoup/safety/CleanerTest.java index d7c6371cbc..961a7636c2 100644 --- a/src/test/java/org/jsoup/safety/CleanerTest.java +++ b/src/test/java/org/jsoup/safety/CleanerTest.java @@ -9,7 +9,10 @@ import org.jsoup.nodes.Range; import org.jsoup.parser.Parser; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import java.util.Arrays; import java.util.Locale; import static org.junit.jupiter.api.Assertions.*; @@ -388,15 +391,47 @@ public void bailsIfRemovingProtocolThatsNotSet() { } @Test void preservesSourcePositionViaUserData() { - Document orig = Jsoup.parse("\n

Hello

", Parser.htmlParser().setTrackPosition(true)); + Document orig = Jsoup.parse("\n

Hello

", Parser.htmlParser().setTrackPosition(true)); Element p = orig.expectFirst("p"); Range origRange = p.sourceRange(); - assertEquals("2,2:22-2,5:25", origRange.toString()); + assertEquals("2,2:22-2,10:30", origRange.toString()); - Document clean = new Cleaner(Safelist.relaxed()).clean(orig); + Range.AttributeRange attributeRange = p.attributes().sourceRange("id"); + assertEquals("2,5:25-2,7:27=2,8:28-2,9:29", attributeRange.toString()); + + Document clean = new Cleaner(Safelist.relaxed().addAttributes("p", "id")).clean(orig); Element cleanP = clean.expectFirst("p"); + assertEquals("1", cleanP.id()); Range cleanRange = cleanP.sourceRange(); - assertEquals(cleanRange, origRange); - assertEquals(clean.endSourceRange(), orig.endSourceRange()); + assertEquals(origRange, cleanRange); + assertEquals(orig.endSourceRange(), clean.endSourceRange()); + assertEquals(attributeRange, cleanP.attributes().sourceRange("id")); } + + @ParameterizedTest @ValueSource(booleans = {true, false}) + void cleansCaseSensitiveElements(boolean preserveCase) { + // https://github.com/jhy/jsoup/issues/2049 + String html = ""; + String[] tags = {"svg", "feMerge", "feMergeNode", "clipPath"}; + String[] attrs = {"kernelMatrix", "baseFrequency"}; + + if (!preserveCase) { + tags = Arrays.stream(tags).map(String::toLowerCase).toArray(String[]::new); + attrs = Arrays.stream(attrs).map(String::toLowerCase).toArray(String[]::new); + } + + Safelist safelist = Safelist.none().addTags(tags).addAttributes(":all", attrs); + String clean = Jsoup.clean(html, safelist); + String expected = "\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + ""; + assertEquals(expected, clean); + } + } diff --git a/src/test/java/org/jsoup/select/ElementsTest.java b/src/test/java/org/jsoup/select/ElementsTest.java index d1895752e8..b5ea4ef358 100644 --- a/src/test/java/org/jsoup/select/ElementsTest.java +++ b/src/test/java/org/jsoup/select/ElementsTest.java @@ -11,9 +11,11 @@ import org.jsoup.nodes.TextNode; import org.junit.jupiter.api.Test; +import java.util.Iterator; import java.util.List; import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertFalse; /** Tests for ElementList. @@ -435,4 +437,166 @@ public void tail(Node node, int depth) { assertEquals("http://example.com/bar", absAttrs.get(1)); assertEquals("http://example.com", absAttrs.get(2)); } + + @Test public void setElementByIndex() { + Document doc = Jsoup.parse("

One

Two

Three"); + Element newP = doc.createElement("p").text("New").attr("id", "new"); + + Elements ps = doc.select("p"); + Element two = ps.get(1); + Element old = ps.set(1, newP); + assertSame(old, two); + assertSame(newP, ps.get(1)); // replaced in list + assertEquals("

One

\n

New

\n

Three

", doc.body().html()); // replaced in dom + } + + @Test public void removeElementByIndex() { + Document doc = Jsoup.parse("

One

Two

Three"); + + Elements ps = doc.select("p"); + Element two = ps.get(1); + assertTrue(ps.contains(two)); + Element old = ps.remove(1); + assertSame(old, two); + + assertEquals(2, ps.size()); // removed from list + assertFalse(ps.contains(old)); + assertEquals("

One

\n

Three

", doc.body().html()); // removed from dom + } + + @Test public void removeElementByObject() { + Document doc = Jsoup.parse("

One

Two

Three"); + + Elements ps = doc.select("p"); + Element two = ps.get(1); + assertTrue(ps.contains(two)); + boolean removed = ps.remove(two); + assertTrue(removed); + + assertEquals(2, ps.size()); // removed from list + assertFalse(ps.contains(two)); + assertEquals("

One

\n

Three

", doc.body().html()); // removed from dom + } + + @Test public void removeElementObjectNoops() { + Document doc = Jsoup.parse("

One

Two

Three"); + String origHtml = doc.html(); + Element newP = doc.createElement("p").text("New"); + + Elements ps = doc.select("p"); + int size = ps.size(); + assertFalse(ps.remove(newP)); + assertFalse(ps.remove(newP.childNodes())); + assertEquals(origHtml, doc.html()); + assertEquals(size, ps.size()); + } + + @Test public void clear() { + Document doc = Jsoup.parse("

One

Two

Three
"); + Elements ps = doc.select("p"); + assertEquals(2, ps.size()); + ps.clear(); + assertEquals(0, ps.size()); + + assertEquals(0, doc.select("p").size()); + } + + @Test public void removeAll() { + Document doc = Jsoup.parse("

One

Two

Three

Four

Div"); + Elements ps = doc.select("p"); + assertEquals(4, ps.size()); + Elements midPs = doc.select("p:gt(0):lt(3)"); //Two and Three + assertEquals(2, midPs.size()); + + boolean removed = ps.removeAll(midPs); + assertEquals(2, ps.size()); + assertTrue(removed); + assertEquals(2, midPs.size()); + + Elements divs = doc.select("div"); + assertEquals(1, divs.size()); + assertFalse(ps.removeAll(divs)); + assertEquals(2, ps.size()); + + assertEquals("

One

\n

Four

\n
\n Div\n
", doc.body().html()); + } + + @Test public void retainAll() { + Document doc = Jsoup.parse("

One

Two

Three

Four

Div"); + Elements ps = doc.select("p"); + assertEquals(4, ps.size()); + Elements midPs = doc.select("p:gt(0):lt(3)"); //Two and Three + assertEquals(2, midPs.size()); + + boolean removed = ps.retainAll(midPs); + assertEquals(2, ps.size()); + assertTrue(removed); + assertEquals(2, midPs.size()); + + assertEquals("

Two

\n

Three

\n
\n Div\n
", doc.body().html()); + + Elements psAgain = doc.select("p"); + assertFalse(midPs.retainAll(psAgain)); + + assertEquals("

Two

\n

Three

\n
\n Div\n
", doc.body().html()); + } + + @Test public void iteratorRemovesFromDom() { + Document doc = Jsoup.parse("

One

Two

Three

Four"); + Elements ps = doc.select("p"); + + assertEquals(4, ps.size()); + for (Iterator it = ps.iterator(); it.hasNext(); ) { + Element el = it.next(); + if (el.text().contains("Two")) + it.remove(); + } + assertEquals(3, ps.size()); + assertEquals("

One

\n

Three

\n

Four

", doc.body().html()); + } + + @Test public void removeIf() { + Document doc = Jsoup.parse("

One

Two

Three

Four"); + Elements ps = doc.select("p"); + + assertEquals(4, ps.size()); + boolean removed = ps.removeIf(el -> el.text().contains("Two")); + assertTrue(removed); + assertEquals(3, ps.size()); + assertEquals("

One

\n

Three

\n

Four

", doc.body().html()); + + assertFalse(ps.removeIf(el -> el.text().contains("Five"))); + assertEquals("

One

\n

Three

\n

Four

", doc.body().html()); + } + + @Test public void removeIfSupportsConcurrentRead() { + Document doc = Jsoup.parse("

One

Two

Three

Four"); + Elements ps = doc.select("p"); + assertEquals(4, ps.size()); + + boolean removed = ps.removeIf(el -> ps.contains(el)); + assertTrue(removed); + assertEquals(0, ps.size()); + assertEquals("", doc.body().html()); + } + + @Test public void replaceAll() { + Document doc = Jsoup.parse("

One

Two

Three

Four"); + Elements ps = doc.select("p"); + assertEquals(4, ps.size()); + + ps.replaceAll(el -> { + Element div = doc.createElement("div"); + div.text(el.text()); + return div; + }); + + // Check Elements + for (Element p : ps) { + assertEquals("div", p.tagName()); + } + + // check dom + assertEquals("

One
Two
Three
Four
", TextUtil.normalizeSpaces(doc.body().html())); + } } diff --git a/src/test/java/org/jsoup/select/QueryParserTest.java b/src/test/java/org/jsoup/select/QueryParserTest.java index ae2f344886..51b7c925d2 100644 --- a/src/test/java/org/jsoup/select/QueryParserTest.java +++ b/src/test/java/org/jsoup/select/QueryParserTest.java @@ -18,10 +18,10 @@ public class QueryParserTest { "
  • l2
  • " + "

    yes

    " + ""); - assertEquals("l1 l2 yes", doc.body().select(">p>strong,>*>li>strong").text()); + assertEquals("l1 yes", doc.body().select(">p>strong,>li>strong").text()); // selecting immediate from body + assertEquals("l2 yes", doc.select("body>p>strong,body>*>li>strong").text()); + assertEquals("l2 yes", doc.select("body>*>li>strong,body>p>strong").text()); assertEquals("l2 yes", doc.select("body>p>strong,body>*>li>strong").text()); - assertEquals("yes", doc.select(">body>*>li>strong,>body>p>strong").text()); - assertEquals("l2", doc.select(">body>p>strong,>body>*>li>strong").text()); } @Test public void testImmediateParentRun() { diff --git a/src/test/java/org/jsoup/select/SelectorTest.java b/src/test/java/org/jsoup/select/SelectorTest.java index 3196dc2527..cd2f519a27 100644 --- a/src/test/java/org/jsoup/select/SelectorTest.java +++ b/src/test/java/org/jsoup/select/SelectorTest.java @@ -19,16 +19,31 @@ * @author Jonathan Hedley, jonathan@hedley.net */ public class SelectorTest { + + /** Test that the selected elements match exactly the specified IDs. */ + static void assertSelectedIds(Elements els, String... ids) { + assertNotNull(els); + assertEquals(ids.length, els.size(), "Incorrect number of selected elements"); + for (int i = 0; i < ids.length; i++) { + assertEquals(ids[i], els.get(i).id(), "Incorrect content at index"); + } + } + + static void assertSelectedOwnText(Elements els, String... ownTexts) { + assertNotNull(els); + assertEquals(ownTexts.length, els.size(), "Incorrect number of selected elements"); + for (int i = 0; i < ownTexts.length; i++) { + assertEquals(ownTexts[i], els.get(i).ownText(), "Incorrect content at index"); + } + } + @Test public void testByTag() { - // should be case insensitive + // should be case-insensitive Elements els = Jsoup.parse("

    Hello

    ").select("DIV"); - assertEquals(3, els.size()); - assertEquals("1", els.get(0).id()); - assertEquals("2", els.get(1).id()); - assertEquals("3", els.get(2).id()); + assertSelectedIds(els, "1", "2", "3"); Elements none = Jsoup.parse("

    Hello

    ").select("span"); - assertEquals(0, none.size()); + assertTrue(none.isEmpty()); } @Test public void byEscapedTag() { @@ -44,12 +59,10 @@ public class SelectorTest { @Test public void testById() { Elements els = Jsoup.parse("

    Hello

    Foo two!

    ").select("#foo"); - assertEquals(2, els.size()); - assertEquals("Hello", els.get(0).text()); - assertEquals("Foo two!", els.get(1).text()); + assertSelectedOwnText(els, "Hello", "Foo two!"); Elements none = Jsoup.parse("
    ").select("#foo"); - assertEquals(0, none.size()); + assertTrue(none.isEmpty()); } @Test public void byEscapedId() { @@ -67,22 +80,18 @@ public class SelectorTest { @Test public void testByClass() { Elements els = Jsoup.parse("

    ").select("P.One"); - assertEquals(2, els.size()); - assertEquals("0", els.get(0).id()); - assertEquals("1", els.get(1).id()); + assertSelectedIds(els, "0", "1"); Elements none = Jsoup.parse("

    ").select(".foo"); - assertEquals(0, none.size()); + assertTrue(none.isEmpty()); - Elements els2 = Jsoup.parse("
    ").select(".one-two"); - assertEquals(1, els2.size()); + Elements els2 = Jsoup.parse("
    ").select(".one-two"); + assertSelectedIds(els2, "1"); } @Test public void byEscapedClass() { - Element els = Jsoup.parse("

    One

    "); - - Element one = els.expectFirst("p.one\\.two\\#three"); - assertEquals("One", one.text()); + Document doc = Jsoup.parse("

    One

    "); + assertSelectedOwnText(doc.select("p.one\\.two\\#three"), "One"); } @Test public void testByClassCaseInsensitive() { @@ -91,8 +100,7 @@ public class SelectorTest { Elements elsFromAttr = Jsoup.parse(html).select("p[class=foo]"); assertEquals(elsFromAttr.size(), elsFromClass.size()); - assertEquals(3, elsFromClass.size()); - assertEquals("Two", elsFromClass.get(1).text()); + assertSelectedOwnText(elsFromClass, "One", "Two", "Three"); } @@ -143,43 +151,31 @@ public void testByAttribute(Locale locale) { @Test public void testNamespacedTag() { Document doc = Jsoup.parse("
    Hello
    There"); Elements byTag = doc.select("abc|def"); - assertEquals(2, byTag.size()); - assertEquals("1", byTag.first().id()); - assertEquals("2", byTag.last().id()); + assertSelectedIds(byTag, "1", "2"); Elements byAttr = doc.select(".bold"); - assertEquals(1, byAttr.size()); - assertEquals("2", byAttr.last().id()); + assertSelectedIds(byAttr, "2"); Elements byTagAttr = doc.select("abc|def.bold"); - assertEquals(1, byTagAttr.size()); - assertEquals("2", byTagAttr.last().id()); + assertSelectedIds(byTagAttr, "2"); Elements byContains = doc.select("abc|def:contains(e)"); - assertEquals(2, byContains.size()); - assertEquals("1", byContains.first().id()); - assertEquals("2", byContains.last().id()); + assertSelectedIds(byContains, "1", "2"); } @Test public void testWildcardNamespacedTag() { Document doc = Jsoup.parse("
    Hello
    There"); Elements byTag = doc.select("*|def"); - assertEquals(2, byTag.size()); - assertEquals("1", byTag.first().id()); - assertEquals("2", byTag.last().id()); + assertSelectedIds(byTag, "1", "2"); Elements byAttr = doc.select(".bold"); - assertEquals(1, byAttr.size()); - assertEquals("2", byAttr.last().id()); + assertSelectedIds(byAttr, "2"); Elements byTagAttr = doc.select("*|def.bold"); - assertEquals(1, byTagAttr.size()); - assertEquals("2", byTagAttr.last().id()); + assertSelectedIds(byTagAttr, "2"); Elements byContains = doc.select("*|def:contains(e)"); - assertEquals(2, byContains.size()); - assertEquals("1", byContains.first().id()); - assertEquals("2", byContains.last().id()); + assertSelectedIds(byContains, "1", "2"); } @Test public void testWildcardNamespacedXmlTag() { @@ -189,22 +185,16 @@ public void testByAttribute(Locale locale) { ); Elements byTag = doc.select("*|Def"); - assertEquals(2, byTag.size()); - assertEquals("1", byTag.first().id()); - assertEquals("2", byTag.last().id()); + assertSelectedIds(byTag, "1", "2"); Elements byAttr = doc.select(".bold"); - assertEquals(1, byAttr.size()); - assertEquals("2", byAttr.last().id()); + assertSelectedIds(byAttr, "2"); Elements byTagAttr = doc.select("*|Def.bold"); - assertEquals(1, byTagAttr.size()); - assertEquals("2", byTagAttr.last().id()); + assertSelectedIds(byTagAttr, "2"); Elements byContains = doc.select("*|Def:contains(e)"); - assertEquals(2, byContains.size()); - assertEquals("1", byContains.first().id()); - assertEquals("2", byContains.last().id()); + assertSelectedIds(byContains, "1", "2"); } @Test public void testWildCardNamespacedCaseVariations() { @@ -242,18 +232,13 @@ public void testByAttributeStarting(Locale locale) { @Test public void testByAttributeRegex() { Document doc = Jsoup.parse("

    "); Elements imgs = doc.select("img[src~=(?i)\\.(png|jpe?g)]"); - assertEquals(3, imgs.size()); - assertEquals("1", imgs.get(0).id()); - assertEquals("2", imgs.get(1).id()); - assertEquals("3", imgs.get(2).id()); + assertSelectedIds(imgs, "1", "2", "3"); } @Test public void testByAttributeRegexCharacterClass() { Document doc = Jsoup.parse("

    "); Elements imgs = doc.select("img[src~=[o]]"); - assertEquals(2, imgs.size()); - assertEquals("1", imgs.get(0).id()); - assertEquals("4", imgs.get(1).id()); + assertSelectedIds(imgs, "1", "4"); } @Test public void testByAttributeRegexCombined() { @@ -1172,13 +1157,67 @@ public void wildcardNamespaceMatchesNoNamespace() { Elements empty = doc.select("li:empty"); Elements notEmpty = doc.select("li:not(:empty)"); - assertEquals(3, empty.size()); - assertEquals(2, notEmpty.size()); + assertSelectedIds(empty, "1", "2", "3"); + assertSelectedIds(notEmpty, "4", "5"); + } + + @Test public void parentFromSpecifiedDescender() { + // https://github.com/jhy/jsoup/issues/2018 + String html = "
    • Foo
    • Bar
      • Baz
      • Qux
    "; + Document doc = Jsoup.parse(html); + + Element ul = doc.expectFirst("#outer"); + assertEquals(2, ul.childrenSize()); + + Element li1 = ul.expectFirst("> li:nth-child(1)"); + assertEquals("Foo", li1.ownText()); + assertTrue(li1.select("ul").isEmpty()); + + Element li2 = ul.expectFirst("> li:nth-child(2)"); + assertEquals("Bar", li2.ownText()); + + // And now for the bug - li2 select was not restricted to the li2 context + Elements innerLis = li2.select("ul > li"); + assertSelectedOwnText(innerLis, "Baz", "Qux"); + + // Confirm that parent selector (" ") works same as immediate parent (">"); + Elements innerLisFromParent = li2.select("ul li"); + assertEquals(innerLis, innerLisFromParent); + } + + @Test public void rootImmediateParentSubquery() { + // a combinator at the start of the query is applied to the Root selector. i.e. "> p" matches a P immediately parented + // by the Root (which is for a top level query, or the context element in :has) + // in the sub query, the combinator was dropped incorrectly + String html = "

    A

    B

    C

    \n"; + Document doc = Jsoup.parse(html); + + Elements els = doc.select("p:has(> span, > i)"); // should match a p with an immediate span or i + assertSelectedIds(els, "0", "2"); + } + + @Test public void is() { + String html = "

    "; + Document doc = Jsoup.parse(html); + + assertSelectedIds( + doc.select(":is(section, article) :is(h1, h2, h3)"), + "2", "3"); + + assertSelectedIds( + doc.select(":is(section, article) ~ :is(h1, h2, h3):has(p)"), + "4"); + + assertSelectedIds( + doc.select(":is(h1:has(p), h2:has(section), h3)"), + "1"); + + assertSelectedIds( + doc.select(":is(h1, h2, h3):has(p)"), + "1", "4"); - assertEquals("1", empty.get(0).id()); - assertEquals("2", empty.get(1).id()); - assertEquals("3", empty.get(2).id()); - assertEquals("4", notEmpty.get(0).id()); - assertEquals("5", notEmpty.get(1).id()); + String query = "div :is(h1, h2)"; + Evaluator parse = QueryParser.parse(query); + assertEquals(query, parse.toString()); } } diff --git a/src/test/java/org/jsoup/select/TraversorTest.java b/src/test/java/org/jsoup/select/TraversorTest.java index 11a5167d61..2b1da28137 100644 --- a/src/test/java/org/jsoup/select/TraversorTest.java +++ b/src/test/java/org/jsoup/select/TraversorTest.java @@ -8,8 +8,10 @@ import org.jsoup.nodes.TextNode; import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.*; public class TraversorTest { // Note: NodeTraversor.traverse(new NodeVisitor) is tested in @@ -193,4 +195,29 @@ else if (node instanceof TextNode && ((TextNode) node).text().equals("Three")) assertEquals("

    Two

    ", TextUtil.stripNewlines(doc.body().html())); } + + @Test void elementFunctionalTraverse() { + Document doc = Jsoup.parse("

    1

    2

    3"); + Element body = doc.body(); + + AtomicInteger seenCount = new AtomicInteger(); + AtomicInteger deepest = new AtomicInteger(); + body.traverse((node, depth) -> { + seenCount.incrementAndGet(); + if (depth > deepest.get()) deepest.set(depth); + }); + + assertEquals(8, seenCount.get()); // body and contents + assertEquals(3, deepest.get()); + } + + @Test void seesDocRoot() { + Document doc = Jsoup.parse("

    One"); + AtomicBoolean seen = new AtomicBoolean(false); + doc.traverse((node, depth) -> { + if (node.equals(doc)) + seen.set(true); + }); + assertTrue(seen.get()); + } } diff --git a/src/test/resources/htmltests/osi-logo.svg b/src/test/resources/htmltests/osi-logo.svg new file mode 100644 index 0000000000..1aefc1007c --- /dev/null +++ b/src/test/resources/htmltests/osi-logo.svg @@ -0,0 +1,185 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/test/resources/local-cert/README.md b/src/test/resources/local-cert/README.md new file mode 100644 index 0000000000..2d4cbe5e38 --- /dev/null +++ b/src/test/resources/local-cert/README.md @@ -0,0 +1,15 @@ +This directory contains resources for a self-signed TLS certificate, used in jsoup's local integration tests. + +Create the certificate: + +```sh +openssl genrsa 2048 > server.key +chmod 400 server.key +openssl req -new -x509 -config cert.conf -nodes -sha256 -days 36135 -key server.key -out server.crt +``` + +Create the Java key store. Used by server, and trusted by client, in `TestServer.java`: +```sh +openssl pkcs12 -export -in server.crt -inkey server.key -out server.p12 -name jsoup -passout pass:hunter2 +keytool -importkeystore -srckeystore server.p12 -srcstoretype PKCS12 -destkeystore server.pfx -deststoretype PKCS12 -srcstorepass hunter2 -deststorepass hunter2 +``` diff --git a/src/test/resources/local-cert/cert.conf b/src/test/resources/local-cert/cert.conf new file mode 100644 index 0000000000..c9019e4828 --- /dev/null +++ b/src/test/resources/local-cert/cert.conf @@ -0,0 +1,13 @@ +[ req ] +distinguished_name = subject +x509_extensions = x509_ext +prompt = no + +[ subject ] +commonName = jsoup test server + +[ x509_ext ] +subjectAltName = @alternate_names + +[ alternate_names ] +DNS.1 = localhost diff --git a/src/test/resources/local-cert/server.crt b/src/test/resources/local-cert/server.crt new file mode 100644 index 0000000000..27d549edab --- /dev/null +++ b/src/test/resources/local-cert/server.crt @@ -0,0 +1,19 @@ +-----BEGIN CERTIFICATE----- +MIIC/zCCAeegAwIBAgIUKEHmb0P5j+5mNjNk/PTdW6t9UTcwDQYJKoZIhvcNAQEL +BQAwHDEaMBgGA1UEAwwRanNvdXAgdGVzdCBzZXJ2ZXIwIBcNMjMxMTAyMjM0OTE1 +WhgPMjEyMjEwMDkyMzQ5MTVaMBwxGjAYBgNVBAMMEWpzb3VwIHRlc3Qgc2VydmVy +MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAvkvtYwy7jnSPYM59EVsR +SjTO8WsXHVs/UJ+Ns+7RiTeb2hUOd4lh38TOh9Yri/7WI5Ejif64FL6b1KEWRe9+ +60QKIOB0+7DUpnXomisD6TytwV8R8BSEZ4vLbMUVizr95Ze+w6SzMPshSvHBMIbU +RimtmY1jBglHytETRBjO1etG120R1M45GJfxV8rIDOgM6FksOnWLQeKzeGKBf0vs +5MlTz/GDs/YpXydg779QOmJAQWj78EMdetwmUPwnpC0kaO3dnlD+mzDrfeSkorrp +5UKij1k4s2tG+E/VIskGyuc/MSU6dc8/ECzuK7c/UjpUz9ohSfLwhSGdjnx0qjXm +kwIDAQABozcwNTAUBgNVHREEDTALgglsb2NhbGhvc3QwHQYDVR0OBBYEFAWRk6Jd +PJrlw3uJKEG7JLku9SwsMA0GCSqGSIb3DQEBCwUAA4IBAQAxEXk5d0ACzaxtOF9+ +/XF3Zt8X/eXxyoQUaG2PyfJkN1rnO7zyx/oPIIAckaZev0eFVwOk3M5K4xxYar/Y +DqdioKwH8qAy4kk7sdCnTU8jlkUMcFqYCt7rLcDviugjg0VO6bYLrq++oeOuDybs +M7J3CgzPAppSpRoTgss3bGzHt87rWJ2XcHxbE8Gg2GtoZnFpcSHkx40EdlDWN8dm +/mZlMxjVFdktz9dpqtR4Q4cAbHETomJOHC2AnhEi3PjuYhGHMbIRgtIg0XX4H/0u +eHVvkb9xJ3SmmdidYTDlOFzLon8NqSZmmt6EDpDio62bDem49jUtnYmxJKXAxhL0 +jnwQ +-----END CERTIFICATE----- diff --git a/src/test/resources/local-cert/server.key b/src/test/resources/local-cert/server.key new file mode 100644 index 0000000000..70c498c286 --- /dev/null +++ b/src/test/resources/local-cert/server.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQC+S+1jDLuOdI9g +zn0RWxFKNM7xaxcdWz9Qn42z7tGJN5vaFQ53iWHfxM6H1iuL/tYjkSOJ/rgUvpvU +oRZF737rRAog4HT7sNSmdeiaKwPpPK3BXxHwFIRni8tsxRWLOv3ll77DpLMw+yFK +8cEwhtRGKa2ZjWMGCUfK0RNEGM7V60bXbRHUzjkYl/FXysgM6AzoWSw6dYtB4rN4 +YoF/S+zkyVPP8YOz9ilfJ2Dvv1A6YkBBaPvwQx163CZQ/CekLSRo7d2eUP6bMOt9 +5KSiuunlQqKPWTiza0b4T9UiyQbK5z8xJTp1zz8QLO4rtz9SOlTP2iFJ8vCFIZ2O +fHSqNeaTAgMBAAECggEACY0zFaEqetyD49aJdYkOJZzf9EMtTlZpp6jSioEGuG33 +nysmZj6ZkItG2I+Z8PVyFyfuUjtcTwJAPRx2yzzZsIJiRcMubAG0ssRBUBevoxHe +INIeSuAkwzPDmqqLycjEvLTwqM5IBkHcqm/XBBIIbpsh8Q6lNUTa+yWiY20hWKBX +7I+mNg9qTsGkYCthZVBgkpmg3DCCX4l8hraHhev3KgdpaILaDSVqjd1IBwJ9ynJc +mJ0/pvIVO7dwxJ7t7b+vNp8iJQjPlOZmz6hWKyFMhxnkOcri3OBYcr1JMkVZ38RD +OjKhaaCnhhSH+IxwLxQQAs//S+EN3l6kOngN5cZ/aQKBgQDiEqp7kT7nAPRMq9Af +okomKnQIpAuEfOauzH02PGkVYawCulWmr+FqdUZxz5SgPEp55IyTfD6iPaSb6QcO +QuH3PvtZyVQv1ZrExquvd/3lS/cQwaDzV4YG46fBbw9K72BHkVV7dkxm+0p4Imid +2XLRqT86difx1etovb7fzMXsCwKBgQDXfNh5Gk250Upyh3+7FDYr7bOvc0l9y/Xn +eODM/yRI3MLaGTUXu90MK50AsOqxedvs4x5NvqG/n2Cr536b9C0tr09CfHeGsOMG +OEfzxMrRv78ItBF7vLELYz1szi6JEZCeK1whgJ1osrTGWAhWkMTIErh3UOfZGgYG +qFQGRFP8mQKBgG7FlqNVV+z4mru2tBPMAWkSBCj3uG0ChkXADNo2X4cKhK4Rf0Zd +h6YSMKIzhC+/Wv6+7eKWTlpQugdq9voV64KqaZ5k98s4bs1cS2N+9/kSb8zWE3co +u5NEmT4+nM+q2xI2NBx6qpULLEIRGhG+KnRw6XpLyubEWsTHtG8UdyZhAoGAVdm5 +bNYb7VICtQpiyyfMRUgYdGgb+XBO8f9ooINt81Fwl++/BUulT3n4vRO/DSIdio0Z +v6OZUXyvyQ0blgp8DV1w2G46OIE0kX/OusHGhDY+Z7tF0+RjLMRG7pheVeGXmkxw +EjDphZLdDsB34fUfUQ6US4UCOa5yhCiAAVcrltECgYEAlYNAELPKAcmWd+4G8Fr6 +07dIgJHZ7W45eZwwUwva9t09J/9d4wq7X4GaX98Jejdeh4nTHnBWX49m6EgQ0ccH +4jcIvTj61aBuDNiW8p85O5gpBrCneFowFHsPElhG2nFSFhGtIST8fkiy5sBwxMFM +1nauFIaX8tP0NxQDw+PvdDc= +-----END PRIVATE KEY----- diff --git a/src/test/resources/local-cert/server.p12 b/src/test/resources/local-cert/server.p12 new file mode 100644 index 0000000000..e6804a0f46 Binary files /dev/null and b/src/test/resources/local-cert/server.p12 differ diff --git a/src/test/resources/local-cert/server.pfx b/src/test/resources/local-cert/server.pfx new file mode 100644 index 0000000000..746aef2e35 Binary files /dev/null and b/src/test/resources/local-cert/server.pfx differ