sys
root@hostname:/home/shimizu/nodejs# cat sys_sample.js
var sys = require('sys');
var obj = {
name: "shimizu",
age: 21,
};
sys.print("Print message\n");
sys.log('Log message.');
sys.print(sys.inspect(obj)+"\n");
root@hostname:/home/shimizu/nodejs# nodejs sys_sample.js
Print message
12 Jan 23:53:01 - Log message.
{ name: 'shimizu', age: 21 }
jsdom
JavaScriptによるDOM解析と操作をサーバーサイド(nodejs)で実施できる
sample1
root@hostname:/home/shimizu/nodejs# cat hello_dom.js
var jsdom = require("jsdom").jsdom;
var document = jsdom("<html><head></head><body><div id='greeting'>hello world</div></body></html>");
var window = document.parentWindow;
console.log(window.document.getElementById("greeting").innerHTML);
root@hostname:/home/shimizu/nodejs# nodejs hello_dom.js
hello world
sample2
root@hostname:/home/shimizu/nodejs/crawler1# cat html.js
var jsdom = require("jsdom").jsdom,
document = jsdom("<html><head></head><body>hello world</body></html>"),
window = document.createWindow();
console.log(window.document.innerHTML);
// output: '<html><head></head><body>hello world</body></html>'
console.log(window.innerWidth)
// output: 1024
console.log(typeof window.document.getElementsByClassName);
// outputs: function
root@hostname:/home/shimizu/nodejs/crawler1# nodejs html.js
/home/shimizu/nodejs/crawler1/html.js:3
window = document.createWindow();
^
TypeError: Object [object HTMLDocument] has no method 'createWindow'
at Object.<anonymous> (/home/shimizu/nodejs/crawler1/html.js:3:25)
at Module._compile (module.js:456:26)
at Object.Module._extensions..js (module.js:474:10)
at Module.load (module.js:356:32)
at Function.Module._load (module.js:312:12)
at Function.Module.runMain (module.js:497:10)
at startup (node.js:119:16)
at node.js:906:3
root@hostname:/home/shimizu/nodejs/crawler1# cat html.js
var jsdom = require("jsdom").jsdom,
document = jsdom("<html><head></head><body>hello world</body></html>"),
window = document.parentWindow; // createWindow()を修正した
console.log(window.document.innerHTML);
// output: '<html><head></head><body>hello world</body></html>'
console.log(window.innerWidth)
// output: 1024
console.log(typeof window.document.getElementsByClassName);
// outputs: function
root@hostname:/home/shimizu/nodejs/crawler1# nodejs html.js
undefined
1024
function
crawlerの作成
タイトルを取得しDBにインサートする
root@hostname:/home/shimizu/nodejs/crawler2# npm install request mysql sequelize cheerio
root@hostname:/home/shimizu/nodejs/crawler2# cat crawler.js
// Initialize instance
var sequelize = require('sequelize');
// Connect to database
var connection = new sequelize('DBName', 'user', 'password');
// Define models
var sites = connection.define('sites', {
url: sequelize.TEXT,
title: sequelize.STRING
});
//Initialize instance
var request = require("request");
var cheerio = require("cheerio");
// Define request url
var requestUrl = "http://www.google.co.jp";
// Send http request
request({url: requestUrl}, function(error, response, body) {
// If request succeed
if (!error && response.statusCode == 200) {
$ = cheerio.load(body); // Create cheerio instance
// Get response data
var url = response.request.href;
var title = $("title").text();
console.log(url);
console.log(title);
// Create new instance
var site = sites.build();
// Set fields
site.url = url;
site.title = title;
// Save to database
site.save()
.success(function(anotherTask) {
console.log('Succeed');
})
.error(function(error) {
console.log(error);
});
}
// If error occured
else {
console.log("--------------------------------------------------");
if (error && "code" in error) {
console.log("Error Code:" + error.code);
}
if (error && "errno" in error) {
console.log("Error No:" + error.errno);
}
if (error && "syscall" in error) {
console.log("Error Syscall:" + error.syscall);
}
if (response && "statusCode" in response) {
console.log("Status Code:" + response.statusCode);
}
}
});
mysql内を確認すると
*************************** 1. row ***************************
id: 1
url: http://www.google.co.jp/
title: Google
createdAt: 2015-01-12 16:04:14
updatedAt: 2015-01-12 16:04:14
3 rows in set (0.00 sec)
参考URL
node.jsで遊ぶ: 標準モジュール編
http://blog.summerwind.jp/archives/1436
node.jsを使ってjQueryチックにWebサイトをクローリングする方法
http://liginc.co.jp/programmer/archives/4848
バージョンごとのdocs
http://nodejs.org/docs/
http://sakuratan.biz/archives/3393
https://github.com/tmpvar/jsdom
http://blog.summerwind.jp/archives/1436