日期:2014-05-20 浏览次数:21140 次
package modelframeworkdemo; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.Date; public class CSHtml { void display() { try { String addr = "http://www.pw.utc.com/vgn-ext-templating/v/PWSearch?keyWord=engine"; // 将用户输入的URL字符串传入URL类对象 URL url = new URL(addr); // 创建URLConnection对象,用URL的openConnection方法将连接通过返回给URLConnection的对象 // 实际上URL的openConnection的返回值就是一个URLConnection URLConnection c = url.openConnection(); // * // 用URLConnection的connect()方法建立连接 c.connect(); // * // 显示该连接的相关信息,这些都是URLConnection的方法 System.out.println("内容类型: " + c.getContentType()); System.out.println("内容长度: " + c.getContentLength()); System.out.println("创建日期: " + new Date(c.getDate())); System.out.println("最后修改日期: " + new Date(c.getLastModified())); System.out.println("终止日期: " + new Date(c.getExpiration())); InputStream is = c.getInputStream(); // * InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); char ch; System.out.println("字节流长度: " + br.toString().length()); int msg = 0; int i = 0; while ((msg = br.read()) != -1) { System.out.println(msg + " " + (char)msg); } System.out.println(br.read()); br.close(); } catch (Exception e) { System.out.println(e); } } public static void main(String[] args) { CSHtml app = new CSHtml(); app.display(); } }
//读取指定url的内容并转换为xml文档
public Document doQuery1(String url)throws Exception{
URL u = new URL(url);
HttpURLConnection conn = (HttpURLConnection)(u.openConnection()); //
conn.setDoOutput(false);
conn.setDoInput(true);
conn.setUseCaches(false);
conn.setRequestMethod("GET");//GET
conn.connect();
int code = conn.getResponseCode();
if(code!=HttpURLConnection.HTTP_OK){
throw new Exception("远程没有返回正确结果,返回【"+code+"】。");
}
//反馈..
/*
//直接把结果打印出来
InputStream in = conn.getInputStream();
BufferedReader br=new BufferedReader(new InputStreamReader(in));
String t=null;
while((t=br.readLine())!=null){
System.out.println(t);
}
return null;
*/
//正常解析
InputStream in = conn.getInputStream();
Document doc=Sys.loadXML(in);
return doc;
}
------解决方案--------------------
我并不认为楼主的方法有什么不妥,担心是缓冲区大小的原因,所以,换了个方法读。
别说我BT,这样可以不用考虑编码问题,如果有乱码那就修改控制台的编码和网页编码相同就行了。
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
public class CSHtml {
void display() {
try {
String addr = "http://www.pw.utc.com/vgn-ext-templating/v/PWSearch?keyWord=engine";
URL url = new URL(addr);
URLConnection c = url.openConnection(); // *
c.connect(); // *
InputStream is = c.getInputStream();
byte [] b = new byte[102400];
int i=0;
while((i=is.read(b,0,b.length))!=-1){
System.out.write(b, 0, i);
}
is.close();
} catch (Exception e) {
System.out.println(e);
}
}
public static void main(String[] args) {
CSHtml app = new CSHtml();
app.display();
}
}