日期:2014-05-16 浏览次数:20351 次
package test
import org.jsoup.nodes.Document
import java.util.HashMap
import org.jsoup.Jsoup
/**
* Author: fuliang
* http://fuliang.iteye.com
*/
class HouseEntry(var title: String,var link: String,var price: Integer, var houseType: String, var date: String){
override def toString(): String = {
return String.format("title: %s\tlink:%s\tprice:%d\thouseType:%s\tdate:%s\n", title,link,price,houseType,date);
}
}
class HouseRentCrawler{
def crawl(url: String,keyword: String,lowRange: Int,highRange: Int): List[HouseEntry] = {
var doc = fetch(url,keyword,lowRange,highRange);
return extract(doc);
}
private def fetch(url:String,keyword: String,lowRange: Int,highRange: Int): Document = {
var params = new HashMap[String,String]();
params.put("final","1");
params.put("jump","2");
params.put("searchtype","3");
params.put("key",keyword);
params.put("MinPrice",lowRange + "_" + highRange);
return Jsoup.connect(url).data(params)
.userAgent("Mozilla")
.timeout(10000)
.get();
}
private def extract(doc: Document): List[HouseEntry] = {
val elements = doc.select("#infolist > tr:not(.dev)");
var houseEntries = List[HouseEntry]();
for(val i <- 0 until elements.size()){
val entry = elements.get(i);
val fields = entry.select("td");
val title = fields.get(0).text();
val link = fields.get(0).select("a[class=t]").attr("href");
val price = fields.get(1).text().toInt;
val houseType = fields.get(2).text();
val date = fields.get(3).text();
val houseEntry = new HouseEntry(title,link,price,houseType,date);
houseEntries ::= houseEntry;
}
return houseEntries;
}
}
object HouseRentCrawler{
def main(args: Array[String]) {
val url = "http://bj.58.com/zufang";
val crawler = new HouseRentCrawler();
val houseEntries = crawler.crawl(url,"智学苑",2000,3500);
for(val entry <- houseEntries){
println(entry);
}
}
}