最近身边的朋友都在看房子,每天沉浸在各大房价网站中,看了几天和我抱怨仍是对杭州的整个房价高低没有一个具体的概念。优秀且敏感的我听到了仿佛闻到了一丝需求的味道,既然他们拥有了这么优秀的我,怎么能让他们一直这么看房!css
完成效果以下: 前端
憋说了!大家的房价由我来守护,是时候要拿出个人吃饭的家伙了。 vue
很好,咱们基于nuxt把基本骨架搭建出来,而后添加咱们须要的文件,最终的整个项目结构以下:node
万事开头难,咱们首先要优化一下nuxt生成的server/index.jsjquery
代码以下:es6
import Koa from 'koa';
import { Nuxt, Builder } from 'nuxt';
import R from 'ramda';
import { resolve } from 'path'
// Import and Set Nuxt.js options
let config = require('../nuxt.config.js')
config.dev = !(process.env === 'production')
const host = process.env.HOST || '127.0.0.1'
const port = process.env.PORT || 4000
const MIDDLEWARES = ['database','crawler','router']
const r = path =>resolve(__dirname,path)
class Server {
constructor(){
this.app = new Koa();
this.useMiddleWares(this.app)(MIDDLEWARES)
}
useMiddleWares(app){
//加载不一样的中间件
return R.map(R.compose(
R.map( i =>i(app)),
require,
i => `${r('./middlewares')}/${i}`
))
}
async start () {
// Instantiate nuxt.js
const nuxt = new Nuxt(config)
// Build in development
if (config.dev) {
const builder = new Builder(nuxt)
await builder.build()
}
this.app.use(async (ctx, next) => {
await next()
ctx.status = 200 // koa defaults to 404 when it sees that status is unset
return new Promise((resolve, reject) => {
ctx.res.on('close', resolve)
ctx.res.on('finish', resolve)
nuxt.render(ctx.req, ctx.res, promise => {
promise.then(resolve).catch(reject)
})
})
})
this.app.listen(port, host)
console.log('Server listening on ' + host + ':' + port) // eslint-disable-line no-console
}
}
const app = new Server();
app.start()
复制代码
代码以下:json
const { resolve } = require('path')
const r = path => resolve(__dirname, path)
require('babel-core/register')({
'presets':[
'stage-3',
[
'latest-node', {
"target": "current"
}
]
],
'plugins': [
'transform-decorators-legacy',
['module-alias', [
{ 'src': r('./server'), 'expose': '~'},
{ 'src': r('./server/database'), 'expose': 'database'}
]]
]
})
require('babel-polyfill')
require('./server/index')
复制代码
前面的铺垫都准备好了,那咱们就能够愉快的宰鸡了~~~api
按照思路,咱们如今开始开始动手爬了。promise
拿出咱们的神器:bash
import cheerio from 'cheerio' //node里的jquery,帮助咱们解析页面
复制代码
下面举出一个文件的例子:
import cheerio from 'cheerio'
import rp from 'request-promise'
import R from 'ramda'
import _ from 'lodash'
import { writeFileSync } from 'fs'
import { resolve } from 'path';
const sleep = time => new Promise(resolve => setTimeout(resolve,time)) //发动一次休息
let _house = [];
let _area = ''
let _areaDetail= [];
export const gethouse = async ( page = 1,area = '') =>{
const options={
uri:`https://hz.fang.anjuke.com/loupan/${area}/p${page}/`,
transform: body => cheerio.load(body),
}
console.log("正在爬"+options.uri);
const $ = await rp(options)
let house = [];
$(".key-list .item-mod").each(function(){ //这里不能用箭头函数,会拿不到this
const name = $(this).find(".infos .lp-name .items-name").text();
const adress = $(this).find(".address .list-map").text();
const huxing = $(this).find(".huxing").text();
const favorPos = $(this).find(".favor-pos .price-txt").text();
const aroundPrice = $(this).find(".favor-pos .around-price").text();
house.push({
name,
huxing,
favorPos,
aroundPrice,
adress
})
})
//细化处理
const fn = R.compose(
R.map((house) =>{
const r1 = house.huxing.replace(/\s+/g,""); //去掉空格
const r2 = house.aroundPrice.replace(/\s+/g,"");
const index1 = r2.indexOf("价");
const index2 = r2.lastIndexOf("/");
const price = r2.slice(index1+1,index2-1)
const reg = /[^\[]*\[(.*)\][^\]]*/;
const r3 = house.adress.match(reg);
const i = house.adress.lastIndexOf("]")+1;
house.adress = house.adress.slice(i).replace(/\s+/g,"");
house.huxing = r1;
house.aroundPrice = price;
house.area = r3[1]
return house
}),
R.filter(house => house.name && house.adress && house.huxing && house.favorPos && house.aroundPrice) //判断数据是否齐全,字段不全则省去
)
house = fn(house);
_house = _.union(_house,house)
if($('.next-page').attr('href')){
//writeFileSync("./static/House.json",JSON.stringify(_house,null,2),'utf-8')
console.log(`${area}共有${_house.length}条数据`)
await sleep(1000);
page++;
await gethouse(page,_area)
}else{
console.log("爬完了!"+_house.length)
return _house
}
}
//拿到了地区的分区,如今去检索每一个分区下的房价
export const getAreaDetail = async () =>{
const area = require(resolve(__dirname,'../database/json/AreaDetail.json'))
for(let i = 0; i<area.length; i++){
let areaDetail = area[i]['areaDetail'];
_areaDetail = _.union(_areaDetail,areaDetail)
for(let j = 0; j< areaDetail.length; j++){
_house=[]
console.log(`正在爬取${areaDetail[j].text}`)
_area = areaDetail[j]._id
console.log(_area)
await gethouse(1,_area)
if(_house.length >0){
areaDetail[j]['house'] = _house
}
}
}
writeFileSync("./server/database/json/detailHouse.json",JSON.stringify(area,null,2),'utf-8')
}
复制代码
代码以下:
export const database = async app =>{
/**
* 一次引入须要爬取数据的方法
*/
const area = require('../crawler/area')
const house = require('../crawler/house')
const areaHouse = require('../crawler/areaHouse')
const detailhouse = require('../crawler/detailHouse')
/**
* 若是本地没有json文件,对应解开注释进行数据的爬去
*/
// await area.getarea()
// await area.getAreaDetail()
// await house.gethouse()
// await areaHouse.getAreaDetail()
// await detailhouse.getAreaDetail()
}
复制代码
代码以下:
根目录nuxt.config.js
module.exports = {
/*
** Headers of the page
*/
head: {
title: 'starter',
meta: [
{ charset: 'utf-8' },
{ name: 'viewport', content: 'width=device-width, initial-scale=1' },
{ hid: 'description', name: 'description', content: 'Nuxt.js project' }
],
link: [
{ rel: 'icon', type: 'image/x-icon', href: '/favicon.ico' }
]
},
/*
** Global CSS
*/
css: ['~static/css/main.css'],
/*
** Customize the progress-bar color
*/
loading: { color: '#3B8070' },
/*
** Build configuration
*/
build: {
/*
** Run ESLINT on save
*/
extend (config, ctx) {
// if (ctx.isClient) {
// config.module.rules.push({
// enforce: 'pre',
// test: /\.(js|vue)$/,
// loader: 'eslint-loader',
// exclude: /(node_modules)/
// })
// }
},
vendor: ['~/plugins/echat']
},
plugins: ['~/plugins/echat']
}
复制代码
plugins/echart.js
import Vue from 'vue'
import echarts from 'echarts'
Vue.prototype.$echarts = echarts
复制代码
page/minHouse.vue
<template>
<div>
<section class="container">
<a @click="turnBack" class="back">返回</a>
<div id="myChart" :style="{width: 'auto', height: '300px'}"></div>
</section>
</div>
</template>
<script>
import { mergeSort } from '../util/index'
import Footer from '../components/layouts/Footer'
import Header from '../components/layouts/Header'
import {
getAreaList,
getAreaHouseList,
getDetailList
} from '../serverApi/area'
export default {
name: 'hello',
data() {
return {
xAxis: [], //x轴的数据
rate: [], //y轴的数据
AreaHouse: [], //所有数据
myChart:'', //chart
_id:[],
detail:[]
}
},
created() {
this.getAreaHouse()
},
mounted() {
/**
*基于准备好的dom,初始化echarts实例
*/
this.myChart = this.$echarts.init(document.getElementById('myChart'))
this.clickBar()
},
methods: {
/**
* 返回逻辑
*/
turnBack(){
this.formateData(this.AreaHouse);
this.drawLine()
},
/**
* 点击bar的交互
*/
clickBar(){
let that = this
this.myChart.on('click',function(params){
...
})
},
/**
*获取小区域内房价
*/
async getDetail({param}){
await getDetailList(param).then((data)=>{
if(data.code === 0){
this.detail = data.area.areaDetail;
this.formateData(this.detail);
this.drawLine()
}
})
},
/**
*获取大区域的房价
*/
async getAreaHouse(){
await getAreaHouseList().then((data)=>{
if(data.code === 0){
this.AreaHouse = data.areaHouse;
this.formateData(this.AreaHouse);
this.drawLine()
}
})
},
/**
* 数据处理,对数据里的价格排序
*/
formateData(data) {
let textAry = [],_id=[],rate=[];
for (let i = 0; i < data.length; i++) {
textAry.push(data[i]['text'])
_id.push(data[i]['_id'])
let sortAry = mergeSort(data[i]['house'])
data[i]['house'] = sortAry
rate.push(sortAry[0]['aroundPrice'])
}
this.xAxis = textAry
this._id = _id
this.rate = rate
},
drawLine() {
/**
* 绘制图表
*/
...
},
components:{
'my-footer': Footer,
'my-header': Header
}
}
</script>
复制代码
到这里,咱们这个项目完成一半了,剩下就是路由的提取,接口的定义和json的数据入库。 休息一下,优秀的你看到(作到)这里,简直要为你鼓掌。不如。。。
啊哈哈哈哈哈哈哈哈哈哈哈哈~