大家都知道HttpClient可以抓取页面数据,但是有的页面需要用户登录后才可以访问,第一次我用浏览器登录了,把浏览器的Cookie放了进去,可以抓取,但是一天后服务器的session就过期了,这样很麻烦,后来在网上找了很多资料,才有了下面的版本,下面需要设置两个URL,一个是登录页的,主要是用来获取登录后的Cookie,然后就可以请求第二次的URL了。代码很简单应该大家都可以看懂,我就不解释了。
package cn.amazon.http;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Map.Entry;import org.apache.http.HttpEntity;import org.apache.http.HttPResponse;import org.apache.http.NameValuePair;import org.apache.http.client.CookieStore;import org.apache.http.client.entity.UrlEncodedFormEntity;import org.apache.http.client.methods.HttpPost;import org.apache.http.impl.client.DefaultHttpClient;import org.apache.http.impl.conn.PoolingClientConnectionManager;import org.apache.http.message.BasicNameValuePair;import org.apache.http.util.EntityUtils;//对接口进行测试 public class getCookie { private String loginUrl = ""; private String SearchUrl = ""; private String charset = "UTF-8"; public void test() { //存放发送参数 Map<String, String> createMap = new HashMap<String, String>(); createMap.put("userName", ""); createMap.put("passWord", ""); createMap.put("email", "huayanh@sellercs.amazon.com"); HttpPost httpPost = null; HttpPost httpPost2 = null; HttpResponse response = null; DefaultHttpClient client = null; String result = null; try { client = new DefaultHttpClient(new PoolingClientConnectionManager()); httpPost = new HttpPost(loginUrl); // 设置请求头 httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0"); // 设置参数 List<NameValuePair> list = new ArrayList<NameValuePair>(); Iterator iterator = createMap.entrySet().iterator(); while (iterator.hasNext()) { Entry<String, String> elem = (Entry<String, String>) iterator.next(); list.add(new BasicNameValuePair(elem.getKey(), elem.getValue())); } if (list.size() > 0) { UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list, charset); httpPost.setEntity(entity); } // 第一次请求 response = client.execute(httpPost); System.out.println(response); // 第二次请求 httpPost2 = new HttpPost(SearchUrl); response = client.execute(httpPost2); System.out.println(response); // 登录后的请求内容 if (response != null) { HttpEntity resEntity = response.getEntity(); if (resEntity != null) { result = EntityUtils.toString(resEntity, charset); } } // System.out.println(result); } catch (Exception ex) { ex.printStackTrace(); } } public static void main(String[] args) { getCookie main = new getCookie(); main.test(); }}新闻热点
疑难解答