关键词

网络爬虫之HTTPClient

HTTPClient官网:http://hc.apache.org/httpcomponents-client-4.5.x/quickstart.html

问题一:明明浏览器请求有数据,可使用HTTPClient输出却为空

	import org.apache.http.*;
	import org.apache.http.client.*;
	import org.apache.http.client.methods.HttpGet;
	import org.apache.http.impl.client.CloseableHttpClient;
	import org.apache.http.impl.client.HttpClients;
	import org.apache.http.util.EntityUtils;
	import org.junit.Test;

	@Test
	public void httpClientTest1() {
	    CloseableHttpClient httpclient = HttpClients.createDefault();
        try{
            String url = "https://www.80s.tw";
            HttpGet httpGet = new HttpGet(url);
            System.out.println("executing request " + httpGet.getURI());
            
            ResponseHandler<String> responseHandler = new ResponseHandler<String>(){
                public String handleResponse(final HttpResponse response) throws ClientProtocolException,IOException{
                    int status = response.getStatusLine().getStatusCode();
                    if (status >= 200 && status < 300){
                        HttpEntity entity = response.getEntity();
                        return entity !=null ? EntityUtils.toString(entity) : null;
                    }else{
                        throw new ClientProtocolException("Unexpected response status: " + status);
                    }
                }
            };
            String responseBody = null;
			try {
				responseBody = httpclient.execute(httpGet,responseHandler);
			} catch (ClientProtocolException e) {
				e.printStackTrace();
			} catch (IOException e) {
			}
            System.out.println("-------------------------------------------");
            System.out.println(responseBody);
            System.out.println("-------------------------------------------");
        }finally{
            try {
				httpclient.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
        }
	}

  原因1:访问该网站可能需要证书

  证书解决办法:http://www.cnblogs.com/zhumengke/p/8846912.html

再次请求时导入我们下载的证书

import javax.net.ssl.SSLContext;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.ssl.TrustSelfSignedStrategy;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.ssl.SSLContexts;
import org.apache.http.util.EntityUtils;
import org.junit.Test;
	@Test
	public void httpTest() {
		SSLContext sslcontext = null;
		try {
			File file = new File("D:/java/jre/lib/security", "jssecacerts");
			sslcontext = SSLContexts.custom()
					.loadTrustMaterial(file, "changeit".toCharArray(), new TrustSelfSignedStrategy()).build();
		}  catch (Exception e) {
			e.printStackTrace();
		}
		SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslcontext, new String[] { "TLSv1" }, null,
				SSLConnectionSocketFactory.getDefaultHostnameVerifier());
		CloseableHttpClient httpclient = HttpClients.custom().setSSLSocketFactory(sslsf).build();
		try {
			HttpGet httpget = new HttpGet("https://www.80s.tw");
			System.out.println("Executing request " + httpget.getRequestLine());
			CloseableHttpResponse response = httpclient.execute(httpget);
			try {
				HttpEntity entity = response.getEntity();
				System.out.println("----------------------------------------");
				System.out.println(response.getStatusLine());
				System.out.println(EntityUtils.toString(entity));
				EntityUtils.consume(entity);
			} finally {
				response.close();
			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				httpclient.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}

  

 

本文链接:http://task.lmcjl.com/news/6778.html

展开阅读全文