Lucene全文检索
POM.xml
1 <!--Lucene全文检索-->
2 <dependency>
3 <groupId>org.apache.lucene</groupId>
4 <artifactId>lucene-core</artifactId>
5 <version>${lucene.version}</version>
6 </dependency>
7 <dependency>
8 <groupId>org.apache.lucene</groupId>
9 <artifactId>lucene-queryparser</artifactId>
10 <version>${lucene.version}</version>
11 </dependency>
12 <dependency>
13 <groupId>org.apache.lucene</groupId>
14 <artifactId>lucene-analyzers-common</artifactId>
15 <version>${lucene.version}</version>
16 </dependency>
17 <dependency>
18 <groupId>org.apache.lucene</groupId>
19 <artifactId>lucene-highlighter</artifactId>
20 <version>${lucene.version}</version>
21 </dependency>
22 <!--中文分词器-->
23 <dependency>
24 <groupId>org.apache.lucene</groupId>
25 <artifactId>lucene-analyzers-smartcn</artifactId>
26 <version>${lucene.version}</version>
27 </dependency>
LuceneUtil.java
SRE实战 互联网时代守护先锋,助力企业售后服务体系运筹帷幄!一键直达领取阿里云限量特价优惠。 1 package io.guangsoft.erp.util; 2
3 import org.apache.lucene.analysis.Analyzer; 4 import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; 5 import org.apache.lucene.document.Document; 6 import org.apache.lucene.index.*; 7 import org.apache.lucene.search.IndexSearcher; 8 import org.apache.lucene.search.Query; 9 import org.apache.lucene.search.ScoreDoc; 10 import org.apache.lucene.search.TopDocs; 11 import org.apache.lucene.search.highlight.*; 12 import org.apache.lucene.store.Directory; 13 import org.apache.lucene.store.FSDirectory; 14
15 import java.nio.file.Paths; 16 import java.util.List; 17
18 public class LuceneUtil { 19 //索引目录位置
20 private static final String INDEX_DIR = "/index"; 21 //索引文件存放目录对象
22 private static Directory directory; 23 //分词器对象
24 private static Analyzer analyzer; 25 //索引写对象,线程安全
26 private static IndexWriter indexWriter; 27 //索引读对象,线程安全
28 private static IndexReader indexReader; 29 //索引搜索对象,线程安全
30 private static IndexSearcher indexSearcher; 31
32 static { 33 try { 34 directory = FSDirectory.open(Paths.get(INDEX_DIR)); 35 //系统关闭前关闭索引库的流
36 Runtime.getRuntime().addShutdownHook(new Thread() { 37 @Override 38 public void run() { 39 try { 40 if(indexWriter != null) { 41 indexWriter.close(); 42 } 43 if(indexReader != null) { 44 indexReader.close(); 45 } 46 if(directory != null) { 47 directory.close(); 48 } 49 } catch (Exception e) { 50 e.printStackTrace(); 51 } 52 } 53 }); 54 } catch (Exception e) { 55 e.printStackTrace(); 56 } 57 } 58
59 //获取分词器
60 public static Analyzer getAnalyzer() { 61 if(analyzer == null) { 62 analyzer = new SmartChineseAnalyzer(); 63 } 64 return analyzer; 65 } 66
67 //获取索引Writer
68 public static IndexWriter getIndexWriter() { 69 if(indexWriter == null || !indexWriter.isOpen()) { 70 try { 71 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(getAnalyzer()); 72 indexWriter = new IndexWriter(directory, indexWriterConfig); 73 } catch (Exception e) { 74 e.printStackTrace(); 75 } 76 } 77 return indexWriter; 78 } 79
80 //获取索引Reader
81 public static IndexReader getIndexReader() { 82 try { 83 if(indexReader == null) { 84 indexReader = DirectoryReader.open(directory); 85 } else { 86 //对比索引库是否更新,更新则使用更新后的Reader
87 IndexReader newIndexReader = DirectoryReader.openIfChanged((DirectoryReader) indexReader); 88 if(newIndexReader != null) { 89 indexReader.close(); 90 indexReader = newIndexReader; 91 } 92 } 93 } catch (Exception e) { 94 e.printStackTrace(); 95 } 96 return indexReader; 97 } 98
99 //获取索引Searcher
100 public static IndexSearcher getIndexSearcher() { 101 indexSearcher = new IndexSearcher(getIndexReader()); 102 return indexSearcher; 103 } 104
105 //打印索引文档(表)
106 public static void printDocument(Document document) { 107 System.out.println(document); 108 List<IndexableField> fieldList = document.getFields(); 109 for(IndexableField field : fieldList) { 110 System.out.println(field.name() + " : " + field.stringValue()); 111 } 112 } 113
114 //打印命中文档
115 public static void printScoreDoc(ScoreDoc scoreDoc) { 116 int docId = scoreDoc.doc; 117 System.out.println("文档编号:" + docId); 118 System.out.println("文档得分:" + scoreDoc.score); 119 try { 120 Document document = indexSearcher.doc(docId); 121 printDocument(document); 122 } catch (Exception e) { 123 e.printStackTrace(); 124 } 125 } 126
127 //打印带得分的命中文档
128 public static void printTopDocs(TopDocs topDocs) { 129 int totalHits = topDocs.totalHits; 130 System.out.println("命中文档总条数:" + totalHits); 131 System.out.println("命中文档最大分数:" + topDocs.getMaxScore()); 132 ScoreDoc[] scoreDocs = topDocs.scoreDocs; 133 for(ScoreDoc scoreDoc : scoreDocs) { 134 printScoreDoc(scoreDoc); 135 } 136 } 137
138 //高亮打印命中文档
139 public static void printTopDocsHighlight(TopDocs topDocs, Query query) { 140 // 格式化器:参数1:前置标签,参数2:后置标签
141 Formatter formatter = new SimpleHTMLFormatter("<em>", "</em>"); 142 //打分对象,参数:query里面的条件,条件里面有搜索关键词
143 Scorer scorer = new QueryScorer(query); 144 //高亮工具:参数1.需要高亮什么颜色, 参数2.将哪些关键词进行高亮
145 Highlighter hightlighter = new Highlighter(formatter, scorer); 146 try { 147 for(ScoreDoc scoreDoc : topDocs.scoreDocs) { 148 Document document = getIndexSearcher().doc(scoreDoc.doc); 149 List<IndexableField> fieldList = document.getFields(); 150 for(IndexableField field : fieldList) { 151 String highlightValue = hightlighter.getBestFragment(getAnalyzer(), field.name(), field.stringValue()); 152 if(highlightValue == null) { 153 highlightValue = field.stringValue(); 154 } 155 System.out.println(field.name() + " : " + highlightValue); 156 } 157 } 158 } catch (Exception e) { 159 e.printStackTrace(); 160 } 161 } 162
163 }
LuceneDAO.java
1 package io.guangsoft.erp.dao; 2
3 import org.apache.lucene.search.TopDocs; 4
5 import java.util.Map; 6
7 public interface LuceneDAO { 8
9 public void insertDoc(Map<String, String> docMap) throws Exception; 10
11 public void deleteDoc(String id) throws Exception; 12
13 public void updateDoc(Map<String, String> docMap) throws Exception; 14
15 public void insertOrUpdateDoc(Map<String, String> docMap) throws Exception; 16
17 //严格匹配整个字段,可传多个字段
18 public TopDocs searchDocsByTerm(Map<String, String> termMap) throws Exception; 19
20 //匹配分词后的字段,可传多个字段
21 public TopDocs searchDocsByParser(Map<String, String> parserMap) throws Exception; 22
23 }
LuceneDAOImpl.java
1 package io.guangsoft.erp.dao.impl; 2
3 import io.guangsoft.erp.dao.LuceneDAO; 4 import io.guangsoft.erp.util.LuceneUtil; 5 import org.apache.lucene.document.Document; 6 import org.apache.lucene.document.Field; 7 import org.apache.lucene.document.FieldType; 8 import org.apache.lucene.index.IndexOptions; 9 import org.apache.lucene.index.IndexWriter; 10 import org.apache.lucene.index.Term; 11 import org.apache.lucene.queryparser.classic.QueryParser; 12 import org.apache.lucene.queryparser.classic.QueryParserBase; 13 import org.apache.lucene.search.*; 14
15 import java.util.Map; 16
17 public class LuceneDAOImpl implements LuceneDAO { 18
19 @Override 20 public void insertDoc(Map<String, String> docMap) throws Exception { 21 FieldType fieldType = new FieldType(); 22 //是否存储记录
23 fieldType.setStored(true); 24 //文档型索引,只索引文档,不支持打分和位置检索
25 fieldType.setIndexOptions(IndexOptions.DOCS); 26 //是否要忽略field的加权基准值,如果为true可以节省内存消耗 27 //但在打分质量方面会有更高的消耗,也不能使用index-time进行加权操作。
28 fieldType.setOmitNorms(true); 29 //是否使用分析器将域值分解成独立的语汇单元流,是否分词
30 fieldType.setTokenized(true); 31 //lucene索引库的一条记录
32 Document document = new Document(); 33 for(Map.Entry<String, String> entry : docMap.entrySet()) { 34 Field field = new Field(entry.getKey(), entry.getValue(), fieldType); 35 document.add(field); 36 } 37 //保存到索引库
38 IndexWriter indexWriter = LuceneUtil.getIndexWriter(); 39 indexWriter.addDocument(document); 40 indexWriter.close(); 41 } 42
43 @Override 44 public void deleteDoc(String id) throws Exception { 45 IndexWriter indexWriter = LuceneUtil.getIndexWriter(); 46 Term term = new Term("id", id); 47 indexWriter.deleteDocuments(term); 48 indexWriter.forceMergeDeletes(); 49 indexWriter.commit(); 50 indexWriter.close(); 51 } 52
53 @Override 54 public void updateDoc(Map<String, String> docMap) throws Exception { 55 FieldType fieldType = new FieldType(); 56 fieldType.setStored(true); 57 fieldType.setIndexOptions(IndexOptions.DOCS); 58 fieldType.setOmitNorms(true); 59 fieldType.setTokenized(true); 60 Document document = new Document(); 61 for(Map.Entry<String, String> entry : docMap.entrySet()) { 62 Field field = new Field(entry.getKey(), entry.getValue(), fieldType); 63 document.add(field); 64 } 65 Term term = new Term("id", docMap.get("id")); 66 IndexWriter indexWriter = LuceneUtil.getIndexWriter(); 67 indexWriter.updateDocument(term, document); 68 indexWriter.close(); 69 } 70
71 @Override 72 public void insertOrUpdateDoc(Map<String, String> docMap) throws Exception { 73 Term term = new Term("id", docMap.get("id")); 74 TermQuery termQuery = new TermQuery(term); 75 TopDocs topDocs = LuceneUtil.getIndexSearcher().search(termQuery, 1); 76 if(topDocs.totalHits == 0) { 77 insertDoc(docMap); 78 } else { 79 updateDoc(docMap); 80 } 81 } 82
83 @Override 84 public TopDocs searchDocsByTerm(Map<String, String> termMap) throws Exception { 85 BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder(); 86 for(Map.Entry<String, String> termEntry : termMap.entrySet()) { 87 Term term = new Term(termEntry.getKey(), termEntry.getValue()); 88 TermQuery termQuery = new TermQuery(term); 89 booleanQueryBuilder.add(termQuery, BooleanClause.Occur.MUST); 90 } 91 BooleanQuery booleanQuery = booleanQueryBuilder.build(); 92 //是否开启特定字段排序
93 boolean orderFlag = false; 94 TopDocs topDocs = null; 95 if(orderFlag) { 96 Sort sort = new Sort(new SortField[]{new SortField("createTime", SortField.Type.LONG, true)}); 97 topDocs = LuceneUtil.getIndexSearcher().search(booleanQuery, 99999999, sort); 98 } else { 99 topDocs = LuceneUtil.getIndexSearcher().search(booleanQuery, 99999999); 100 } 101 return topDocs; 102 } 103
104 @Override 105 public TopDocs searchDocsByParser(Map<String, String> parserMap) throws Exception { 106 BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder(); 107 for(Map.Entry<String, String> parserEntry : parserMap.entrySet()) { 108 QueryParser queryParser = new QueryParser(parserEntry.getKey(), LuceneUtil.getAnalyzer()); 109 queryParser.setDefaultOperator(QueryParserBase.AND_OPERATOR); 110 Query query = queryParser.parse(parserEntry.getValue()); 111 booleanQueryBuilder.add(query, BooleanClause.Occur.MUST); 112 } 113 BooleanQuery booleanQuery = booleanQueryBuilder.build(); 114 //是否开启特定字段排序
115 boolean orderFlag = false; 116 TopDocs topDocs = null; 117 if(orderFlag) { 118 Sort sort = new Sort(new SortField[]{new SortField("createTime", SortField.Type.LONG, true)}); 119 topDocs = LuceneUtil.getIndexSearcher().search(booleanQuery, 99999999, sort); 120 } else { 121 topDocs = LuceneUtil.getIndexSearcher().search(booleanQuery, 99999999); 122 } 123 return topDocs; 124 } 125
126 }
LuceneTest.java
1 package io.guangsoft.erp; 2
3 import com.alibaba.fastjson.JSONArray; 4 import com.alibaba.fastjson.JSONObject; 5 import io.guangsoft.erp.dao.LuceneDAO; 6 import io.guangsoft.erp.dao.impl.LuceneDAOImpl; 7 import io.guangsoft.erp.util.LuceneUtil; 8 import org.apache.lucene.index.Term; 9 import org.apache.lucene.search.TermQuery; 10 import org.apache.lucene.search.TopDocs; 11 import org.junit.Test; 12
13 import java.util.HashMap; 14 import java.util.Map; 15 import java.util.stream.Collectors; 16
17 public class LuceneTest { 18
19 LuceneDAO luceneDAO = new LuceneDAOImpl(); 20
21 @Test 22 public void testInsertDoc() throws Exception { 23 JSONArray jsonArray = JSONArray.parseArray( 24 "[{id:'1',name:'李白',desc:'朝辞白帝彩云间'}, " +
25 "{id:'2',name:'杜甫',desc:'润物细无声'}, " +
26 "{id:'3',name:'苏轼',desc:'大江东去浪淘尽'}]"); 27 for(int i = 0; i < jsonArray.size(); i++) { 28 JSONObject jsonObject = jsonArray.getJSONObject(i); 29 Map<String, String> docMap = jsonObject.entrySet().stream().collect(Collectors.toMap( 30 Map.Entry :: getKey, entry -> entry.getValue().toString() 31 )); 32 luceneDAO.insertDoc(docMap); 33 } 34 } 35
36 @Test 37 public void testSearchDocsByTerm() throws Exception { 38 Map<String, String> docMap = new HashMap<String, String>(); 39 docMap.put("name", "李白"); 40 TopDocs topDocs = luceneDAO.searchDocsByTerm(docMap); 41 LuceneUtil.printTopDocs(topDocs); 42 } 43
44 @Test 45 public void testSearchDocsByParser() throws Exception { 46 Map<String, String> docMap = new HashMap<String, String>(); 47 docMap.put("name", "李白"); 48 TopDocs topDocs = luceneDAO.searchDocsByParser(docMap); 49 LuceneUtil.printTopDocsHighlight(topDocs, new TermQuery(new Term("name", "李白"))); 50 } 51
52 @Test 53 public void testUpdateDoc() throws Exception { 54 Map<String, String> docMap = new HashMap<String, String>(); 55 docMap.put("name", "李白"); 56 LuceneUtil.printTopDocs(luceneDAO.searchDocsByTerm(docMap)); 57 docMap.put("id", "1"); 58 docMap.put("desc", "人生得意须尽欢"); 59 luceneDAO.updateDoc(docMap); 60 docMap.remove("id"); 61 docMap.remove("desc"); 62 LuceneUtil.printTopDocs(luceneDAO.searchDocsByTerm(docMap)); 63 } 64
65 @Test 66 public void testDeleteDoc() throws Exception{ 67 Map<String, String> docMap = new HashMap<String, String>(); 68 docMap.put("id", "1"); 69 LuceneUtil.printTopDocs(luceneDAO.searchDocsByTerm(docMap)); 70 luceneDAO.deleteDoc("1"); 71 LuceneUtil.printTopDocs(luceneDAO.searchDocsByTerm(docMap)); 72 } 73 }

更多精彩