LuceneでMoreLikeThisがうまくいかない(いった)
C:\Lucene\files\にTwitterのユーザー名.txtがあり、TwitetrのPOSTが1行に1つ
C:\Lucene\indexにindexを作成済み
1つのユーザー名.txtから類似したユーザー名.txtを探す
import java.io.File; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import org.apache.jackrabbit.core.query.lucene.MoreLikeThis; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.ja.JapaneseAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; class SearchJFilesFromDoc { public static void main(String[] args) throws CorruptIndexException, IOException, ParseException { System.setProperty("sen.home", "D:\\Software\\java\\sen"); String index = "C:\\Lucene\\index"; Directory dir = new RAMDirectory(new File(index)); IndexReader reader = IndexReader.open(dir); IndexSearcher searcher = new IndexSearcher(dir); MoreLikeThis mlt = new MoreLikeThis(reader); Analyzer analyzer = new JapaneseAnalyzer(); mlt.setAnalyzer(analyzer); String user = "snkken"; String userTweet = "C:\\Lucene\\files\\" + user + ".txt"; Reader target = new StringReader(userTweet); Query query = mlt.like(target); System.out.println("Searching for: " + query.toString("contents")); TopDocs docs = searcher.search(query, 10); for (ScoreDoc scoreDoc : docs.scoreDocs) { int docId = scoreDoc.doc; Document doc = searcher.doc(docId); System.out.println("path" + doc.get("path")); } searcher.close(); } }
実行結果
log4j:WARN No appenders could be found for logger (net.java.sen.Dictionary). log4j:WARN Please initialize the log4j system properly. Searching for:
研究室の仲間の協力を得て無事動作!
Query query = mlt.like(new BufferedReader(new InputStreamReader(new FileInputStream(userTweet), "SJIS"))); // Reader target = new StringReader(userTweet); // FileInputStream fis = new FileInputStream(target); // Query query = mlt.like(target);
log4j:WARN No appenders could be found for logger (net.java.sen.Dictionary). log4j:WARN Please initialize the log4j system properly. Searching for: あめ kt スクレ の ロイド 帽子 じょ クーラ 赤い w だ 人 カ ええ は 数 ー に た トランスフォーマ て 明日 さん も ない pathC:\Lucene\files\snkken.txt その他結果