需求:给定进百万小区名称及对应城市信息,通过高德获取其周边设施信息(交通、医疗、教育、生活设施)
高德JS API
爬取灵感来自高德开发支持的JS API,当然你也可以直接调用高德提供的api服务获取地址信息,但是api服务是有次数限制。而JS API的调用并没有次数限制,经我的验证一个爬取节点控制在500毫秒的抓取频率是没有限制的。
https://lbs.amap.com/api/javascript-api/example/poi-search/keywords-search
高德地址信息获取原理
通过JS引入高德的AMap对象,调用相关方法获取对应数据。JS调用高德地图显示周边信息也是这个逻辑。
准备工作
注册高德地图账号->控制台创建应用->申请Key
需要申请Key才可以在自己js代码中使用高德js
数据抓取
数据服务API:
读取小区数据放入list中 每来一次请求从list中pop一条数据返回,这样前端可以多个节点多个任务同时抓取。
数据获取:
要想使用高德 需引入如下2个js,其中的key即为高德官网申请的key
<script src="http://cache.amap.com/lbs/static/jquery-1.9.1.js"></script> <script src="http://webapi.amap.com/maps?v=1.3&key=************&plugin=AMap.ArrivalRange,AMap.Scale,AMap.Geocoder,AMap.Transfer,AMap.Autocomplete" id="amap_key"></script>
get请求获取小区数据,创建高德对象
$.ajax({ type:"get", url:"http://127.0.0.1:5075/api/community", dataType:"json", contentType:"application/json", success:function(data){ params = data; console.log('Getapi data',data) MAP = new AMap.Map("container", { resizeEnable: true }); getAddress(data) } });
调用高德JS搜素小区信息:
1.在AMap.service方法中构造构造地点查询类
2.调用地点查询类的search方法返回结果result
3.使用返回结果中的小区经纬度调用searchNearBy去获取周边数据
AMap.service(["AMap.PlaceSearch"], function() { var placeSearch = new AMap.PlaceSearch({ //构造地点查询类 pageSize: 3, pageIndex: 1, city: city, //城市 map: MAP, type:"商务住宅|门牌信息", citylimit:true, extensions:"all", // panel: "panel" }); //关键字查询 placeSearch.search(keyword, function(status, result) { console.log('getAddress info',result.info,result); searchNearBy(city,cpoint,keyword); }); });
前端打印的搜索结果,获取的信息非常详细包括省市县及其编码,以及经纬度信息等:
完整代码
将代码保存至index.html, 在同级目录下python(python -m SimpleHTTPServer 8000)本地启动http服务。
浏览器请求:http://localhost:8000?key=***************************
<!doctype html> <html> <head> <meta charset="utf-8"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta name="viewport" content="initial-scale=1.0, user-scalable=no, width=device-width"> <title>高德房源地址获取</title> <script src="http://cache.amap.com/lbs/static/jquery-1.9.1.js"></script> <!-- <script src="http://webapi.amap.com/maps?v=1.3&key=01aa9e655798548a06c7697db75a9903&plugin=AMap.ArrivalRange,AMap.Scale,AMap.Geocoder,AMap.Transfer,AMap.Autocomplete" id="amap_key"></script> --> <script type="text/javascript"> // 根据请求url获取小区信息 及构造高德js function GetRequest() { var url = decodeURIComponent(location.search); //获取url中"?"符后的字串 var theRequest = new Object(); if (url.indexOf("?") != -1) { var str = url.substr(1); strs = str.split("&"); for(var i = 0; i < strs.length; i ++) { theRequest[strs[i].split("=")[0]]=unescape(strs[i].split("=")[1]); } } return theRequest; } var params = GetRequest(); var key = params.key; // 动态构造引入高德js var mapSrc = 'http://webapi.amap.com/maps?v=1.3&key='+key+'&plugin=AMap.ArrivalRange,AMap.Scale,AMap.Geocoder,AMap.Transfer,AMap.Autocomplete'; console.log(mapSrc) document.write('<script src='+mapSrc+'></sc' + 'ript>') </script> </head> <body id='body'> <div id="container"></div> <script type="text/javascript"> function send_mongodb(save_data){ save_url = decodeURIComponent(location.href)+'&'+params.id; console.log('amap_data done:',save_data,save_url) params = { "_template": "amap_gps_nearby", "_title": save_data.community+"周边信息", "_timestamp": Date.parse(new Date()), "_hostname": '', "_url": save_url, "_data": { "data":save_data }, "_timestampstr": new Date() } $.ajax({ type:"POST", url:"http://127.0.0.1:5125/upload", dataType:"json", contentType:"application/json", data:JSON.stringify(params), success:function(data){ console.log('POST mongodb:',data) } }); setTimeout(function(){ flag_over = true; // location.reload(); }, 400); } function getAddress(params) { // alert('') var city = params.city; var district = params.district; var keyword = params.xiaoqu; var xiaoqu_code = params.xiaoqu_code; var xiaoqu = keyword; console.log('---------------------',city,keyword); AMap.service(["AMap.PlaceSearch"], function() { var placeSearch = new AMap.PlaceSearch({ //构造地点查询类 pageSize: 3, pageIndex: 1, city: city, //城市 map: MAP, type:"商务住宅|门牌信息", citylimit:true, extensions:"all", // panel: "panel" }); //关键字查询 placeSearch.search(keyword, function(status, result) { if(status==0){ console.log('请求失败') return } console.log('getAddress info',result.info,result); if(keyword!='' && result.poiList &&result.poiList.pois.length>0){ var detail = []; var detail = result.poiList.pois[0]; for(i=0; i<result.poiList.pois.length; i++){ // 对返回的数据通过区县进行二次筛选 var item = result.poiList.pois[i]; if(item.adname && district && (item.adname.indexOf(district)!=-1 || district.indexOf(item.adname)!=-1) && (xiaoqu.indexOf(item.name)!=-1 || item.name.indexOf(xiaoqu)!=-1)){ detail = item; break; } } var cpoint = [detail["location"]["lng"],detail["location"]["lat"]]; save_data = {"city":city,"district":district,"community":xiaoqu, "community_code":xiaoqu_code,"detail":detail} searchNearBy(city,cpoint,keyword); }else{ flag_over = true; console.log(keyword+' not find in amap'); // location.reload(); } }); }); } function searchNearBy(city,cpoint,keyword){ console.log('searchNearBy:',city,cpoint,keyword) var placeSearchNearBy = new AMap.PlaceSearch({ //构造地点查询类 pageSize: 5, type: '', pageIndex: 1, city: city, //城市 citylimit:true, map: MAP, }); var nearby = {}; // len = 30; var types = [ {'subway':'地铁站'},{'bus':'公交车站'},{'parking_lot':'停车场'},{'other_traffic':'飞机场|火车站'}, {'kids_school':'幼儿园'},{'primary_school':'小学'},{'middle_school':'初中'},{'high_school':'高等院校'},{'college':'大学'},{'train_school':'培训机构'},{'library':'图书馆'},{'science_museum':'科技馆'},{'AAA_hospital':'三级甲等医院'},{'special_hospital':'专科医院'},{'hospital':'综合医院'},{'clinic':'诊所'},{'pharmacy':'医药保健销售店'},{'shopping':'商场'},{'store':'便民商店|便利店'},{'supermarket':'超级市场'},{'comprehensive_market':'综合市场'},{'bank':'银行'},{'ATM':'ATM'},{'food':'中餐厅|外国餐厅|快餐厅'},{'drink':'咖啡厅|茶艺馆|冷饮店|甜品店'},{'park':'公园'},{'movies':'电影院'},{'sports':'运动场馆'},{'entertainment':'娱乐场所'},{'gym':'健身中心'} ] Search(placeSearchNearBy, types) function Search(placeSearchNearBy, types){ if(types.length==0){ flagSuccess = true; save_data['nearby'] = nearby; send_mongodb(save_data); return } var item = types.pop(); for(var name in item){ type = item[name] } placeSearchNearBy.setType(type); placeSearchNearBy.searchNearBy('', cpoint, 5000, function(status, result) { if(result.poiList && result.poiList.pois.length>0){ nearby[name] = result.poiList.pois; } else{ nearby[name] = []; } setTimeout(function(){ Search(placeSearchNearBy, types) }, 400); }); } } save_data = {}; setInterval(function(){ location.reload(); }, 1000*60*10); $.ajax({ type:"get", url:"http://127.0.0.1:5075/api/community", dataType:"json", contentType:"application/json", success:function(data){ params = data; console.log('Getapi data',data) setTimeout(function(){ MAP = new AMap.Map("container", { resizeEnable: true }); getAddress(data) }, 400*10); } }); </script> </body> </html>