javascript如何使用HtmlUnit修复从网站加载的所有URL?
使用HtmlUnit
从WebClient
获取请求时,缺少某些URL
问题清单:
不像facebook那样加载URL。com类型为JSON,Google分析URL类型为JavaScript、gif和XHR
例如,在Google Analytics中,5个URL中有4个。1 URL未启动。请检查我们的代码,并让我们知道如何在
conversion.async.js
这是我的密码:
WebClient webClient = new WebClient(BrowserVersion.FIREFOX_52);
webClient.getCookieManager().clearCookies();
webClient.getCache().clear();
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
webClient.setCssErrorHandler(new SilentCssErrorHandler());
webClient.getOptions().setTimeout(120000);
// to wait for AJAX
webClient.waitForBackgroundJavaScript(60000);
webClient.getOptions().setRedirectEnabled(true);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setPrintContentOnFailingStatusCode(false);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setUseInsecureSSL(true);
webClient.getOptions().setDoNotTrackEnabled(false);
new WebConnectionWrapper(webClient) {
int conversionUrlCount = 0;
@Override
public WebResponse getResponse(WebRequest request) throws IOException {
System.out.println(request.getUrl());
WebResponse response = super.getResponse(request);
System.out.println(response.getStatusCode());
if (response.getStatusCode() < 400) {
resourceUrls.add(request.getUrl());
}
return response;
}
};
String url = "abc.com",
HtmlPage page = webClient.getPage(url);
// to forcibly load the link
HtmlLink link = page.getFirstByXPath("//link");
link.getWebResponse(true);
// to forcibly load the image
try {
HtmlImage image = page.getFirstByXPath("//img");
image.getImageReader();
} catch (IOException e) {
//don't need to crash at this point,
//just let the user know that a wrong file has been passed.
}
共 (0) 个答案