Html.java
/**
* Copyright (C) 2017 HttpBuilder-NG Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package groovyx.net.http.optional;
import groovy.util.XmlSlurper;
import groovyx.net.http.ChainedHttpConfig;
import groovyx.net.http.FromServer;
import groovyx.net.http.NativeHandlers;
import groovyx.net.http.ToServer;
import groovyx.net.http.TransportingException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Supplier;
import static groovyx.net.http.NativeHandlers.Encoders.handleRawUpload;
import static groovyx.net.http.NativeHandlers.Encoders.stringToStream;
/**
* Parser and Encoder methods for handling HTML content using the https://jsoup.org/[JSoup] HTML library.
*/
public class Html {
public static final Supplier<BiFunction<ChainedHttpConfig, FromServer, Object>> neckoParserSupplier = () -> Html::neckoParse;
public static final Supplier<BiConsumer<ChainedHttpConfig, ToServer>> jsoupEncoderSupplier = () -> Html::jsoupEncode;
public static final Supplier<BiFunction<ChainedHttpConfig, FromServer, Object>> jsoupParserSupplier = () -> Html::jsoupParse;
/**
* Method that provides an HTML parser for response configuration (uses necko parser).
*
* @param config the chained configuration
* @param fromServer the server response adapter
* @return the parsed HTML content (a {@link groovy.util.slurpersupport.GPathResult} object)
*/
public static Object neckoParse(final ChainedHttpConfig config, final FromServer fromServer) {
try {
final XMLReader p = new org.cyberneko.html.parsers.SAXParser();
p.setEntityResolver(NativeHandlers.Parsers.catalogResolver);
return new XmlSlurper(p).parse(new InputStreamReader(fromServer.getInputStream(), fromServer.getCharset()));
} catch (IOException | SAXException ex) {
throw new TransportingException(ex);
}
}
/**
* Method that provides an HTML parser for response configuration (uses JSoup).
*
* @param config the chained configuration
* @param fromServer the server response adapter
* @return the parsed HTML content (a {@link Document} object)
*/
public static Object jsoupParse(final ChainedHttpConfig config, final FromServer fromServer) {
try {
return Jsoup.parse(fromServer.getInputStream(), fromServer.getCharset().name(), fromServer.getUri().toString());
} catch (IOException e) {
throw new TransportingException(e);
}
}
public static void jsoupEncode(final ChainedHttpConfig config, final ToServer ts) {
final ChainedHttpConfig.ChainedRequest request = config.getChainedRequest();
if (handleRawUpload(config, ts)) {
return;
}
final Document document = (Document) request.actualBody();
ts.toServer(stringToStream(document.text(), request.actualCharset()));
}
}