概要:
這篇博文主要講一下如何使用Phantomjs進(jìn)行數(shù)據(jù)抓取,這里面抓的網(wǎng)站是太平洋電腦網(wǎng)估價(jià)的內(nèi)容。主要是對(duì)電腦筆記本以及他們的屬性進(jìn)行抓取,然后在使用nodejs進(jìn)行下載圖片和插入數(shù)據(jù)庫(kù)操作。
先進(jìn)行所有頁(yè)面的內(nèi)容進(jìn)行抓取
var page =require('webpage').create(); var address='http://product.pconline.com.cn/server/'; var fs = require('fs'); var mypath = 'version/server/server.txt'; var count = 2; var pageSize=0; phantom.outputEncoding="gbk"; page.settings.userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"; function loadController(status){ loadComputerList(address); } function loadComputerList(url){ console.log('loading '+url); page.onLoadFinished = function loadListsucc(status){ console.log("loadlistSucc ["+url+"] =======================Status:"+status); }; page.open(url,function(status){ setTimeout(function(){ console.log(status); var content=''; content = page.evaluate(function(){ var cont=''; var listComputer = document.querySelectorAll('div.item-title>h3>a'); var listPrice =document.querySelectorAll('div.price'); for(var j=0;jvar computer = listComputer[j].innerText; &n