import java.io.BufferedInputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.ProxyConfig;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebResponse;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.util.NameValuePair;
* 这个DEMO主要为了测试爬虫(动态)代理IP的稳定性
* 完美支持企业信息天眼查、电商Ebay、亚马逊、新浪微博、法院文书、分类信息等
* 也可以作为爬虫参考项目,如需使用,请自行修改webParseHtml方法
public class TestDynamicIpContinue {
public static List
ipList = new ArrayList<>();
public static boolean gameOver = false;
public static void main(String[] args) throws Exception {
// 每隔几秒提取一次IP
long fetchIpSeconds = 5;
int testTime = 3;
// 请填写无忧代理IP订单号,填写之后才可以提取到IP哦
String order = "88888888888888888888888888888";
// 你要抓去的目标网址
// 企业信息天眼查 http://www.tianyancha.com/company/1184508115
// 企业信息工商系统 http://www.gsxt.gov.cn/%7BLtkX_Us_Uuw_QRrZ9mfv2cbf8ANpkJNT8_EzigHHLIvfwbsXfxY0o15JwumCNmvtm_nv9Wtm2Iy_ptgrdpD7p-dP6C8an4IYel_Bx4EnhQhxk8Q4jptLj9IMw9N0lCP-4i0Q4MN55e0wtKOgDy4GEw-1493711400352%7D
// 电商Ebay http://www.ebay.com/sch/tenco-tech/m.html?_ipg=200&_sop=12&_rdc=1
// 电商天猫 https://list.tmall.com/search_product.htm?cat=56594003&brand=97814105&sort=s&style=g&search_condition=23&from=sn_1_cat&industryCatId=50025174#J_crumbs
// 电商京东 https://search.jd.com/Search?keyword=%E8%8B%8F%E6%89%93%E7%B2%89&enc=utf-8&suggest=1.def.0.T15&wq=s%27d%27f&pvid=1d962d789b81461aa6cce40b26a90429
// IP检测 http://ip.chinaz.com/getip.aspx
// 匿名度检测 http://www.xxorg.com/tools/checkproxy/
// 新浪微博 https://m.weibo.cn/api/container/getIndex?containerid=100103type%3D3%26q%3D%E6%B1%BD%E8%BD%A6&queryVal=%E6%B1%BD%E8%BD%A6&type=user&page=2
// 法院文书 https://m.itslaw.com/mobile
// 分类信息百姓网 http://china.baixing.com/cheliang/
String targetUrl = "http://pv.sohu.com/cityjson?ie=utf-8";
// 设置referer信息,如果抓取淘宝、天猫需要设置
String referer = "";
// 开启对https的支持
boolean https = true;
// 是否输出Header信息
boolean outputHeaderInfo = false;
// 是否加载JS,加载JS会导致速度变慢
boolean useJS = false;
// 请求超时时间,单位毫秒,默认5秒
int timeOut = 10000;
if (order == null || "".equals(order)) {
System.err.println("请输入爬虫(动态)代理订单号");
return;
System.out.println(">>>>>>>>>>>>>>动态IP测试开始<<<<<<<<<<<<<>>>>>>>>>>>>>动态IP测试结束<<<<<<<<<<<<< headers = response.getResponseHeaders();
for (NameValuePair nameValuePair : headers) {
System.out.println(nameValuePair.getName() + "-->" + nameValuePair.getValue());
boolean isJson = false ;
if (response.getContentType().equals("application/json")) {
html = response.getContentAsString();
isJson = true ;
}else if(page.isHtmlPage()){
html = ((HtmlPage)page).asXml();
long endMs = System.currentTimeMillis();
if (url.indexOf("2017.ip138.com") != -1) {
System.out.println(getName() + " " + ipport + " 用时 " + (endMs - startMs) + "毫秒 :" + Jsoup.parse(html).select("center").text());
}else if(url.equals("http://www.xxorg.com/tools/checkproxy/")) {
System.out.println(getName() + " " + ipport + " 用时 " + (endMs - startMs) + "毫秒 :" + Jsoup.parse(html).select("#result .jiacu").text());
}else if(isJson) {
System.out.println(getName() + " " + ipport + " 用时 " + (endMs - startMs) + "毫秒 :" +html);
}else if(url.indexOf("tianyancha.com") != -1) {
Document doc = Jsoup.parse(html);
Elements els = doc.select(".c8");
System.out.println(getName() + "企业基本信息:");
for (Element element : els) {
System.out.println("\t*" + element.text());
els = doc.select(".companyInfo-table tr");
System.out.println(getName() + "企业股东信息:");
for (Element element : els) {
System.out.println("\t*" + element.text());
els = doc.select("#_container_check tr");
System.out.println(getName() + "企业抽查息:");
for (Element element : els) {
System.out.println("\t*" + element.text());
}else{
Document doc = Jsoup.parse(html);
System.out.println(getName() + " " + ipport + " 用时 " + (endMs - startMs) + "毫秒 :" + doc.select("title").text());
} catch (Exception e) {
System.err.println(ipport + ":" + e.getMessage());
} finally {
client.close();
return html;
// 定时获取动态IP
public class GetIP implements Runnable{
long sleepMs = 1000;
int maxTime = 3;
String order = "";
String targetUrl;
boolean useJs;
int timeOut;
String referer;
boolean https;
boolean outputHeaderInfo;
public GetIP(long sleepMs, int maxTime, String order, String targetUrl, boolean useJs, int timeOut, String referer, boolean https, boolean outputHeaderInfo) {
this.sleepMs = sleepMs;
this.maxTime = maxTime;
this.order = order;
this.targetUrl = targetUrl;
this.useJs = useJs;
this.timeOut = timeOut;
this.referer=referer;
this.https=https;
this.outputHeaderInfo=outputHeaderInfo;
@Override
public void run() {
int time = 1;
while(!gameOver){
if(time >= 4){
gameOver = true;
break;
try {
java.net.URL url = new java.net.URL("http://api.ip.data5u.com/dynamic/get.html?order=" + order + "&ttl&random=true");
HttpURLConnection connection = (HttpURLConnection)url.openConnection();
connection.setConnectTimeout(3000);
connection = (HttpURLConnection)url.openConnection();
InputStream raw = connection.getInputStream();
InputStream in = new BufferedInputStream(raw);
byte[] data = new byte[in.available()];
int bytesRead = 0;
int offset = 0;
while(offset < data.length) {
bytesRead = in.read(data, offset, data.length - offset);
if(bytesRead == -1) {
break;
offset += bytesRead;
in.close();
raw.close();
String[] res = new String(data, "UTF-8").split("\n");
System.out.println(">>>>>>>>>>>>>>当前返回IP量 " + res.length);
for (String ip : res) {
new Crawler(100, targetUrl, useJs, timeOut, ip, referer, https, outputHeaderInfo).start();
} catch (Exception e) {
System.err.println(">>>>>>>>>>>>>>获取IP出错, " + e.getMessage());
try {
Thread.sleep(sleepMs);
} catch (InterruptedException e) {
e.printStackTrace();
public String joinList(List
list){
StringBuilder re = new StringBuilder();
for (String string : list) {
re.append(string).append(",");
return re.toString();
public String trim(String html) {
if (html != null) {
return html.replaceAll(" ", "").replaceAll("\n", "");
return null;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.Authenticator;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.PasswordAuthentication;
import java.net.Proxy;
import java.net.URL;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLSession;
* 测试无忧代理动态转发代理,本段代码支持请求HTTP和HTTPS协议的网址,比如http://www.example.com、https://www.example.com
* @author www.data5u.com
public class TestHttps {
private static void trustAllHttpsCertificates() throws Exception {
javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1];
javax.net.ssl.TrustManager tm = new miTM();
trustAllCerts[0] = tm;
javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext.getInstance("TLS");
sc.init(null, trustAllCerts, null);
javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
static class miTM implements javax.net.ssl.TrustManager, javax.net.ssl.X509TrustManager {
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return null;
public boolean isServerTrusted(java.security.cert.X509Certificate[] certs) {
return true;
public boolean isClientTrusted(java.security.cert.X509Certificate[] certs) {
return true;
public void checkServerTrusted(java.security.cert.X509Certificate[] certs, String authType)
throws java.security.cert.CertificateException {
return;
public void checkClientTrusted(java.security.cert.X509Certificate[] certs, String authType)
throws java.security.cert.CertificateException {
return;
public static void main(String[] args) {
// 如果爬虫请求HTTPS网址,必须加入这两行
System.setProperty("jdk.http.auth.proxying.disabledSchemes", "");
System.setProperty("jdk.http.auth.tunneling.disabledSchemes", "");
// 固定为tunnel.data5u.com:56789
final String httpsIpport = "tunnel.data5u.com:56789";
final String order = "【把这里换成你的IP提取码】"; // 用户名
final String pwd = "【把这里换成你的动态转发密码】"; // 密码
final String targetUrl = "http://myip.ipip.net/"; // 要抓取的目标网址
int requestTime = 5;
for(int i = 0; i < requestTime; i++) {
final int x = i;
new Thread(new Runnable() {
@Override
public void run() {
try {
long startTime = System.currentTimeMillis();
// 如果爬虫请求HTTPS网址,必须加入这两行
System.setProperty("jdk.http.auth.proxying.disabledSchemes", "");
System.setProperty("jdk.http.auth.tunneling.disabledSchemes", "");
// 信任所有证书,当请求HTTPS网址时需要
// 该部分必须在获取connection前调用
trustAllHttpsCertificates();
HttpsURLConnection.setDefaultHostnameVerifier(new HostnameVerifier() {
public boolean verify(String urlHostName, SSLSession session) {
return true;
URL link = new URL(targetUrl);
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress((httpsIpport.split(":"))[0], Integer.parseInt((httpsIpport.split(":"))[1])));
HttpURLConnection connection = (HttpURLConnection)link.openConnection(proxy);
// Java系统自带的鉴权模式,请求HTTPS网址时需要
Authenticator.setDefault(new Authenticator() {
public PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication(order, pwd.toCharArray());
connection.setRequestMethod("GET");
connection.setDoInput(true);
connection.setDoOutput(true);
connection.setRequestProperty("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36");
connection.setUseCaches(false);
connection.setConnectTimeout(60000);
connection.connect();
String line = null;
StringBuilder html = new StringBuilder();
BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream(), "utf-8"));
while((line = reader.readLine()) != null){
html.append(line);
try {
if (reader != null) {
reader.close();
} catch (Exception e) {
connection.disconnect();
long endTime = System.currentTimeMillis();
System.out.println(x + " [OK]" + "→→→→→" + targetUrl + " " + (endTime - startTime) + "ms " + connection.getResponseCode() + " " + html.toString());
} catch (Exception e) {
e.printStackTrace();
System.err.println(x + " [ERR]" + "→→→→→" + e.getMessage());
}).start();
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.net.Authenticator;
import java.net.PasswordAuthentication;
/**因Socks5代理需要密码验证,所以本DEMO带验证逻辑**/
public class Socks5IpDemo {
/**内置的密码验证类**/
class BasicAuthenticator extends Authenticator {
String userName;
String password;
public BasicAuthenticator(String userName, String password) {
this.userName = userName;
this.password = password;
@Override
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication(userName, password.toCharArray());
public static void main(String[] args) {
try {
String targetUrl = "http://pv.sohu.com/cityjson?ie=utf-8";
HttpURLConnection connection = null;
URL link = new URL(targetUrl);
// 这个IP要换 成可用的IP哦,这里案例只是随便写的一个IP
String ipport = "218.26.204.66:8080";
String charset = "UTF-8";
// 设置代理
Proxy proxy = new Proxy(Proxy.Type.SOCKS, new InetSocketAddress((ipport.split(":"))[0], Integer.parseInt((ipport.split(":"))[1])));
connection = (HttpURLConnection)link.openConnection(proxy);
// 密码验证,用户名和密码要改为正确的哦
Authenticator.setDefault(new BasicAuthenticator("data5u", "123321"));
connection.setDoOutput(true);
connection.setRequestProperty("User-agent", "");
connection.setRequestProperty("Accept", "*/*");
connection.setRequestProperty("Accept-Charset", charset);
connection.setRequestProperty("Referer", "");
connection.setRequestProperty("Upgrade-Insecure-Requests", "1");
connection.setRequestProperty("Cookie", "");
connection.setUseCaches(false);
connection.setReadTimeout(10000);
int rcode = connection.getResponseCode();
if (rcode != 200) {
System.out.println("使用代理IP连接网络失败,状态码:" + connection.getResponseCode());
}else {
String line = null;
StringBuilder html = new StringBuilder();
BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream(), charset));
while((line = reader.readLine()) != null){
html.append(line);
try {
if (reader != null) {
reader.close();
} catch (Exception e) {
System.out.println("请求" + targetUrl + ", 得到如下信息:");
System.out.println(html.toString());
} catch (Exception e) {
System.err.println("发生异常:" + e.getMessage());
Https代理IP是什么?适用范围有哪些?
① Https代理IP是数据无忧_无忧代理IP_DATA5U提供的高效、稳定的代理IP,具体参考
动态代理IP
。
② Https代理IP,IP有效期最长60秒。
③ Https代理IP适用于做数据爬虫、大数据业务。
数据无忧_无忧代理IP_DATA5U·专业的代理IP服务商 电话:18210476952
备案号:
京ICP备16045418号
ICP经营许可:京B2-20192105
国内互联网虚拟专用网业务许可:B1-20200383
声明:本站资源仅限用来计算机技术学习研究,所有IP都是中国大陆(内地)的机房IP,不支持访问国外网站。
京公网安备 11011402011314号