概要:
這篇博文主要講一下如何使用Phantomjs進行數(shù)據(jù)抓取,這里面抓的網(wǎng)站是太平洋電腦網(wǎng)估價的內(nèi)容。主要是對電腦筆記本以及他們的屬性進行抓取,然后在使用nodejs進行下載圖片和插入數(shù)據(jù)庫操作。
先進行所有頁面的內(nèi)容進行抓取
var page =require('webpage').create(); var address='http://product.pconline.com.cn/server/'; var fs = require('fs'); var mypath = 'version/server/server.txt'; var count = 2; var pageSize=0; phantom.outputEncoding="gbk"; page.settings.userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"; function loadController(status){ loadComputerList(address); } function loadComputerList(url){ console.log('loading '+url); page.onLoadFinished = function loadListsucc(status){ console.log("loadlistSucc ["+url+"] =======================Status:"+status); }; page.open(url,function(status){ setTimeout(function(){ console.log(status); var content=''; content = page.evaluate(function(){ var cont=''; var listComputer = document.querySelectorAll('div.item-title>h3>a'); var listPrice =document.querySelectorAll('div.price'); for(var j=0;jvar computer = listComputer[j].innerText; &n