monkeeShark/src/tools/analysis/core.ts

52 lines
1.1 KiB
TypeScript
Raw Normal View History

2017-09-06 14:58:28 +00:00
const bayes = require('./naive-bayes.js');
const MeCab = require('mecab-async');
import Post from '../../api/models/post';
import config from '../../conf';
/**
* 稿稿
*/
export default class Categorizer {
private classifier: any;
private mecab: any;
constructor() {
this.mecab = new MeCab();
if (config.categorizer.mecab_command) this.mecab.command = config.categorizer.mecab_command;
// BIND -----------------------------------
this.tokenizer = this.tokenizer.bind(this);
}
private tokenizer(text: string) {
2017-09-06 17:59:33 +00:00
const tokens = this.mecab.parseSync(text)
// 名詞だけに制限
.filter(token => token[1] === '名詞')
// 取り出し
.map(token => token[0]);
return tokens;
2017-09-06 14:58:28 +00:00
}
public async init() {
this.classifier = bayes({
tokenizer: this.tokenizer
});
// 訓練データ取得
const verifiedPosts = await Post.find({
is_category_verified: true
});
// 学習
verifiedPosts.forEach(post => {
this.classifier.learn(post.text, post.category);
});
}
public async predict(text) {
return this.classifier.categorize(text);
}
}