今天遇到一个需求就是对一个动态的html页面(ajax获取数据进行填充)进行读取源代码然后使用nodejs的npm进行编译,最佳方式是使用httpclient来实现获取源码,但为了最快速来验证下这个需求流程所以下文用Java原生API来实现:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
public class Test {
public static String getHtmlContent(URL url, String encode) {
StringBuffer contentBuffer = new StringBuffer();
int responseCode = -1;
HttpURLConnection con = null;
try {
con = (HttpURLConnection) url.openConnection();
con.setRequestProperty("User-Agent",
"Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE代理进行下载
con.setConnectTimeout(60000);
con.setReadTimeout(60000);
// 获得网页返回信息码
responseCode = con.getResponseCode();
if (responseCode == -1) {
System.out.println(url.toString()
+ " : connection is failure...");
con.disconnect();
return null;
}
if (responseCode >= 400) // 请求失败
{
System.out.println("请求失败:get response code: " + responseCode);
con.disconnect();
return null;
}
InputStream inStr = con.getInputStream();
InputStreamReader istreamReader = new InputStreamReader(inStr,
encode);
BufferedReader buffStr = new BufferedReader(istreamReader);
String str = null;
while ((str = buffStr.readLine()) != null)
contentBuffer.append(str);
inStr.close();
} catch (IOException e) {
e.printStackTrace();
contentBuffer = null;
System.out.println("error: " + url.toString());
} finally {
con.disconnect();
}
return contentBuffer.toString();
}
public static String getHtmlContent(String url, String encode) {
if (!url.toLowerCase().startsWith("http://")) {
url = "http://" + url;
}
try {
URL rUrl = new URL(url);
return getHtmlContent(rUrl, encode);
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
public static void main(String argsp[]) {
System.out.println(getHtmlContent("www.k-run.cn", "utf-8"));
}
}

–EOF–