>

jsop深入分析得到htmldome,获取http协议央求战败必

- 编辑:www.bifa688.com -

jsop深入分析得到htmldome,获取http协议央求战败必

Jsoup难点---获取http协议诉求失利 org.jsoup.UnsupportedMimeTypeException: Unhandled content type. Must be text/*, application/xml, or application/xhtml xml.,jsoupxhtml

package com.open1111.jsoup;

Jsoup查找dom元素,jsoup查找dom

package com.open1111.jsoup;

import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
必发88手机版,import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class Demo02 {//

public static void main(String[] args) throws Exception{
CloseableHttpClient httpclient = HttpClients.createDefault(); // 创建httpclient实例
HttpGet httpget = new HttpGet(""); // 创建httpget实例

CloseableHttpResponse response = httpclient.execute(httpget); // 执行get请求
HttpEntity entity=response.getEntity(); // 获取重回实体
String content=EntityUtils.toString(entity, "utf-8");//设置content编码
response.close(); // 关闭流和自由系统能源

Document doc=Jsoup.parse(content); // 分析网页 获得文书档案对象
Elements elements=doc.getElementsByTag("title"); // 获取tag是title的所有DOM元素
Element element=elements.get(0); // 获取第1个元素
String title=element.text(); // 重回成分的文本
System.out.println("网页标题是:" title);

Element element2=doc.getElementById("site_nav_top"); // 获取id=site_nav_top的DOM元素
String navTop=element2.text(); // 重回成分的文本
System.out.println("口号:" navTop);

Elements itemElements=doc.getElementsByClass("post_item"); // 依照样式名称来询问DOM
System.out.println("=======输出post_item==============");
for(Element e:itemElements){//for加强循环
System.out.println(e.html());
System.out.println("-------------");
}

Elements widthElements=doc.getElementsByAttribute("width"); // 依照属性名称来询问DOM
System.out.println("=======输出with的DOM==============");
for(Element e:widthElements){
System.out.println(e.toString());
System.out.println("-------------");
}

Elements targetElements=doc.getElementsByAttributeValue("target", "_blank");//依据属性名和属性值来得到dom成分
System.out.println("=======输出target-_blank的DOM==============");
for(Element e:targetElements){
System.out.println(e.toString());
System.out.println("-------------");
}

}
}

package com.open1111.jsoup; import org.apache.http.HttpEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache....

Jsoup问题---获取http协议央浼战败

1、难点:用Jsoup在获取一些网址的多寡时,早先获取很顺遂,但是在访谈某浪的数量是Jsoup报错,应该是须要头里面包车型客车乞求类型(ContextType)不符合必要。

错误音信:

Exception in thread "main" org.jsoup.UnsupportedMimeTypeException: Unhandled content type. Must be text/*, application/xml, or application/xhtml xml. Mimetype=application/json; charset=utf-8, URL=...
 at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:547)
 at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:493)
 at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:205)
 at org.jsoup.helper.HttpConnection.get(HttpConnection.java:194)
 at com.Interface.test.JsoupUtil.httpGet(JsoupUtil.java:30)
 at com.Interface.test.test.main(test.java:23)

恳请方法:

public static String httpGet(String url,String cookie) throws IOException{
        //获取请求连接
        Connection con = Jsoup.connect(url);
        //请求头设置,特别是cookie设置
        con.header("Accept", "text/html, application/xhtml xml, */*"); 
        con.header("Content-Type", "application/x-www-form-urlencoded");
        con.header("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0))"); 
        con.header("Cookie", cookie);
        //解析请求结果
        Document doc=con.get(); 
        //获取标题
        System.out.println(doc.title());
        return doc.toString(); 

    }

2、化解:只要求在 Connection con = Jsoup.connect(url);中增添ignoreContentType(true)就能够,这里的ignoreContentType(true)意思就是忽视ContextType的反省。

添加后

        //获取请求连接
        Connection con = Jsoup.connect(url).ignoreContentType(true);

 

org.jsoup.UnsupportedMimeTypeException: Unhandled content type. Must be text/*, application/xml, or application/xhtml xml.,jsoupxhtml J...

import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class Demo01 {//jsopdemo

public static void main(String[] args) throws Exception{
CloseableHttpClient httpclient = HttpClients.createDefault(); // 创建httpClient实例
HttpGet httpget = new HttpGet(""); // 创建httpGet实例

CloseableHttpResponse response = httpclient.execute(httpget); // 创造httpresponse实例并用来就收httpClient实例实践get供给重回值
HttpEntity entity=response.getEntity(); // 从response中获得实体类
String content=EntityUtils.toString(entity, "utf-8");//entity转换来字符串
response.close(); // 关闭流和自由系统财富

Document doc=Jsoup.parse(content); // 分析网页 获得文书档案对象
Elements elements=doc.getElementsByTag("title"); // 获取tag是title的所有DOM元素
Element element=elements.get(0); // 获取第1个元素
String title=element.text(); // 再次来到成分的文本
System.out.println("网页标题是:" title);

Element element2=doc.getElementById("site_nav_top"); // 获取id=site_nav_top的DOM元素
String navTop=element2.text(); // 再次回到成分的公文
System.out.println("口号:" navTop);

}
}

本文由必发88手机版发布,转载请注明来源:jsop深入分析得到htmldome,获取http协议央求战败必