SpringBoot+Mybatis+Elasticsearch 实现模糊分页查询并标记关键字
SpringBoot+Mybatis+Elasticsearch 实现模糊分页查询并标记关键字
SpringBoot 整合 Elasticsearch 实现模糊分页查询并标记关键字
一、概述 & 介绍
Elasticsearch 是基于 Lucense 技术的搜索引擎(服务器),将数据进行缓存再进行查询。
与数据库查询的比较:
(1)相当于 sql 查询的 like 模糊查询,但 Elasticsearch 支持分词模糊查询,比如字符串 “abcdef 你 好 abdcd” ,通过数据库查询 [select * from user where user_name like '% 你 好 %'; ] 只能查询仅限于以 “你 好” 为整体得到相关的结果【abcdef 你 好 abdcd】或【abcdef 你 好】或【你 好 abdcd】等。而 Elasticsearch 搜索结果将 “你 好” 进行拆分查询,结果可以得到【abcdef 你 好 abdcd】【abcdef 你】、【好 abdcd】、【 好 abd】,【ef 你】等,可见查询效果更灵活范围更广。
形容词:elastic => 有弹性的,有弹力的,灵活的;名词: elastic => 橡皮圈 =====> Elasticsearch 弹性灵活搜索
Elasticsearch 使用场景:网站全局搜索、电商网站商品推荐、文章内容检索、文本分析 。。。。。。。。。等等等等。
官网:https://www.elastic.co/cn/
下载地址:https://www.elastic.co/cn/downloads/elasticsearch
一、环境描述:
技术架构:
后端:springboot、mybtis、elasticsearch
前端:vuejs
二、环境搭建:
搭建环境主要需要安装 elasticsearch(搜索引擎)和 elasticsearch-head(帮助界面),具体安装方式可以参考
windows 版本安装:https://blog.csdn.net/chen_2890/article/details/83757022
linux 版本安装:https://blog.csdn.net/qq_32502511/article/details/86140486
启动系统变量限制问题参考 https://www.cnblogs.com/zuikeol/p/10930685.html
三、具体实现
本次实现为网站文章搜索,搜索内容根据标题、内容、文章描述进行搜索,实现分页搜索和列表功能。
实现步骤描述:1.java 爬虫收集文章;2、定时任务将文章从数据表中加载上传到 elasticsearch 搜索引擎服务器;3. 分页模糊搜索并标记展示搜索到相关关键词。
【第一步】通过爬虫搜集网站博客文章插入到数据表中。
爬虫方式:java 定时任务,jsoup 爬虫 html 页面解析工具(需要了解被爬虫的网站页面 html 结构),直接上爬虫代码:
package com.etech.univweb.timertask;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.collections.CollectionUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.stereotype.Component;
import com.etech.univweb.common.utils.CommonUtils;
import com.etech.univweb.common.utils.DateUtils;
import com.etech.univweb.mgsite.dao.BasArticleMapper;
import com.etech.univweb.mgsite.entity.BasArticle;
/**
- 网页文章爬虫工具
- @author 肖仁枰(Alex Ping)
*/
@Component
@EnableScheduling //开启定时任务
@EnableAsync //开启多线程
public class WebArticleCrwalerTask {
private static Logger logger = LoggerFactory.getLogger(ElasticSearchImportDataTask.class);
@Autowired
BasArticleMapper basArticleMapper;
private int i = 1;
@Async
@Scheduled(cron = "0/5 * * * * ?") // 5秒钟一次
public void crawlArticles(){
try {
// 建立连接,获取网页内容
String url = "https://www.jianshu.com/c/d0c5b34afc5f?order_by=added_at&page=" + i;
// 将内容转换成dom格式,方便操作
Document doc = Jsoup.connect(url).get();
//logger.info("定时热任务请求网站===============================>" + url + "内容====>" + doc.toString());
if(doc != null) {
// 获取网页内所有标题节点
Elements htmlElements = doc.select("a[class=\"title\"]");
for (Element elementItem : htmlElements) { //详情
// 进入文章详情url
String detailUrl = elementItem.attr("abs:href");
Document articleDoc = Jsoup.connect(detailUrl).get();
logger.info("网站文章详情url===============================>" + detailUrl + "内容========>" + articleDoc.toString());
if(articleDoc != null) {
// 内容主体模块
String articleContent = articleDoc.select("section[class=\"ouvJEz\"]").html();
// 获取文章内容等信息
// 标题内容
String articleTitle = articleDoc.select("h1[class=\"_1RuRku\"]").html();
// 标题对应的作者
String articleAuthor = articleDoc.select("span[class=\"_22gUMi\"]").text();
// 获取图片
Elements imgElements = articleDoc.select("img[src]");
String articlePicUrl = null;
if(!CollectionUtils.isEmpty(imgElements)) {
for(Element imgElementItem : imgElements) {
if(imgElementItem.attr("src") != null) {
articlePicUrl = imgElementItem.attr("src").toString() + "?imageMogr2/auto-orient/strip|imageView2/2/w/1200/format/webp";
break ;
}
}
}else {
// 默认图片
articlePicUrl = "http://view.jqueryfuns.com/2019/4/3/25c12c43a656b98f5c33788c0ff8100b/images/h2.jpg";
}
String writeTime = articleDoc.select("time").text().replace("0", "-");
Date createTime = DateUtils.formatElasticSearchDateStr(writeTime);
String articleDescription = articleTitle;
this.saveArticle(articleTitle, articleAuthor, articleContent, articlePicUrl, articleDescription, createTime);
logger.info("网站文章===============================>" + articleTitle + articleAuthor + articleContent + articlePicUrl + articleDescription);
}
}
}
i += 1;
}catch(Exception e) {
logger.info("服务器内部错误:爬虫异常" + e.getMessage());
e.printStackTrace();
}
}
/**
* 保存爬虫结果文章内容到数据库
* @param articleTitle
* @param articleAuthor
* @param articleContent
* @param articlePicUrl
* @param articleDescription
* @return
*/
public String saveArticle(String articleTitle, String articleAuthor, String articleContent, String articlePicUrl, String articleDescription, Date createTime) {
try {
Map<String, Object> paramMap = new HashMap<String, Object>();
paramMap.put("articleTitle", articleTitle);
List<BasArticle> articleList = basArticleMapper.findArticleByTitle(paramMap);
if(CollectionUtils.isEmpty(articleList)) {
// 插入数据
BasArticle article = new BasArticle();
article.setArticleId(CommonUtils.generatePrimaryKeyId());
article.setArticleTitle(articleTitle);
article.setArticleAuthor(articleAuthor);
article.setArticleContent(articleContent);
article.setArticleType("normal");
article.setArticlePicUrl(articlePicUrl);
article.setArticleDescription(articleDescription);
article.setReadCount(0L);
article.setStatus(1);
if(createTime == null) {
createTime = new Date();
}
article.setCreateTime(createTime);
basArticleMapper.insert(article);
}
} catch (Exception e) {
logger.debug("服务器内部错误:文章数据导入失败!");
e.printStackTrace();
}
return "success";
}
}
文章数据表结构:
CREATE TABLE bas_article
(
article_id
varchar(64) NOT NULL,
article_pic_url
varchar(200) DEFAULT NULL COMMENT ‘文章开头图片’,
article_title
varchar(200) DEFAULT NULL COMMENT ‘标题’,
article_content
longtext COMMENT ‘内容’,
article_type
varchar(10) DEFAULT NULL COMMENT ‘类型(normal普通,recomand推荐,hot热门)’,
article_author
varchar(100) DEFAULT NULL COMMENT ‘作者’,
read_count
bigint(5) DEFAULT NULL COMMENT ‘阅读数量’,
status
int(5) DEFAULT NULL COMMENT ‘状态(1启用/发布,0禁用/未发布)’,
create_id
varchar(64) DEFAULT NULL COMMENT ‘创建人id’,
create_time
datetime DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP COMMENT ‘创建时间’,
update_time
datetime DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP COMMENT ‘更新时间’,
article_description
varchar(200) DEFAULT NULL COMMENT ‘文章简介’,
PRIMARY KEY (article_id
)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
收集结果
【第二步】将数据表中的内容加载到 Elasticsearch 搜索服务器中
定时任务查询内容:
package com.etech.univweb.timertask;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Resource;
import org.apache.shiro.util.CollectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.stereotype.Component;
import com.etech.univweb.mgsite.dao.BasArticleMapper;
import com.etech.univweb.mgsite.entity.BasArticle;
import com.etech.univweb.system.elasticsearch.repository.EsArticleRepository;
import com.etech.univweb.system.elasticsearch.vo.ESArticleVo;
/**
-
定时任务导入数据到elasticsearch搜索引擎中
-
@author 肖仁枰(Alex Ping)
*/
@Component
@EnableScheduling //开启定时任务
@EnableAsync //开启多线程
public class ElasticSearchImportDataTask {private static Logger logger = LoggerFactory.getLogger(ElasticSearchImportDataTask.class);
@Resource
private EsArticleRepository esArticleRepository;@Autowired
private BasArticleMapper basArticleMapper;//@Async
//@Scheduled(cron = “0/5 * * * * ?”) // 5秒钟一次
public void importTravel() throws InterruptedException {
logger.info(“执行定时任务…”);
try {
if (true) {
logger.info(“正在导入文章数据…”);
Map<String, Object> paramMap = new HashMap<String, Object>();
List articleList = basArticleMapper.findArticleList(paramMap);
List eSArticleVoList = new ArrayList();
if(!CollectionUtils.isEmpty(articleList)) {
for(BasArticle articleItem : articleList) {
ESArticleVo eSArticleVo = new ESArticleVo();
//BeanUtils.copyProperties(articleItem, eSArticleVo);
eSArticleVo.setCreateTime(articleItem.getCreateTime());
eSArticleVo.setArticleTitle(articleItem.getArticleTitle());
eSArticleVo.setArticleAuthor(articleItem.getArticleAuthor());
//eSArticleVo.setArticleContent(articleItem.getArticleContent());
eSArticleVo.setName(articleItem.getArticleAuthor());
eSArticleVo.setDescription(articleItem.getArticleDescription());
eSArticleVo.setType(articleItem.getArticleType());
eSArticleVo.setContent(articleItem.getArticleContent());
eSArticleVo.setBusId(articleItem.getArticleId());
eSArticleVo.setTitle(articleItem.getArticleTitle());
esArticleRepository.save(eSArticleVo);
}
Iterable data = esArticleRepository.saveAll(eSArticleVoList);
if (data != null) {
logger.info(“处理完成:文章数据导入完毕!”);
}
}
}
} catch (Exception e) {
logger.debug(“服务器内部错误:文章数据导入失败!”);
e.printStackTrace();
}
}
}
/** -
Elasticsearch 基础查询实体
-
@author XiaoRenPing
/
public class ElasticSearchEntity implements Serializable {
private static final long serialVersionUID = 5695568297523302402L;
/*
* ID
/
@Id
@Field(type = FieldType.Keyword, store=true)
private String busId;
/*
* 类型
/
@Field(type = FieldType.Keyword, store=true)
private String type;
/*
* 名称
/
@Field(type = FieldType.Keyword, store=true)
private String name;
/*
* 状态
/
@Field(type = FieldType.Keyword, store=true)
private String status;
/*
* 标题
*/
@Field(type = FieldType.Text, analyzer = “ik_max_word”, store=true)
private String title;
/**
* 内容
*/
@Field(type = FieldType.Text, analyzer = "ik_max_word", store=true)
private String content;
/**
* 描述
*/
@Field(type = FieldType.Text, analyzer = "ik_max_word", store=true)
private String description;
/**
* 创建时间
*/
@Field(type = FieldType.Keyword, store=true)
private Date createTime;
/**
* 更新时间
*/
@Field(type = FieldType.Keyword, store=true)
private Date updateTime;
/**
- elasticsearch 文章搜索实体
- @author XiaoRenPing
*/
@Document(indexName = “es_article_index”, type=“article”)
public class ESArticleVo extends ElasticSearchEntity implements Serializable {
@Field(type = FieldType.Keyword, store=true)
private String articleId;
@Field(type = FieldType.Keyword, store=true)
private String articleTitle;
@Field(type = FieldType.Keyword, store=true)
private String articleContent;
@Field(type = FieldType.Keyword, store=true)
private String articleTopic;
@Field(type = FieldType.Keyword, store=true)
private String articlePicUrl;
@Field(type = FieldType.Keyword, store=true)
private String articleAuthor;
/**
- elasticsearch 搜索接口
- @author XiaoRenPing
*/
public interface EsArticleRepository extends ElasticsearchCrudRepository<ESArticleVo, String>{
List<ESArticleVo> findByArticleTitle(ESArticleVo eSArticleVo, String articleTitle, String articleContent);
List<ESArticleVo> findByArticleTitleOrContent(ESArticleVo eSArticleVo, String searchKey);
}
数据收集完成,可以通过 elasticsearch-head 界面工具预览数据
【第三步】SpringBoot 整合 Elasticsearch 和实现关键词搜索
(1)pom.xml 导入 maven 依赖包
<!-- 网页解析工具 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
<!-- springdata整合elasticsearch -->
<dependency>
<groupId>org.springframework.data</groupId>
<artifactId>spring-data-elasticsearch</artifactId>
</dependency>
(2)application.yml 配置
(3)MVC 实现
/**
- Elasticsearch 搜索api接口控制层
- @author XiaoRenPing
*/
@RestController
@RequestMapping(“/app/api/search”)
public class UnivwebElasticSearchApiController {
@Resource
IElasticSearchService elasticSearchService;
/**
* 分页搜索文章
* @param request
* @return
*/
@PostMapping("/article")
public PageResponse articleSearch(@RequestBody ESArticleSearchRequest request) {
if(request == null) {
return PageResponse.ok();
}
if(StringUtils.isBlank(request.getKeyword())) {
return PageResponse.ok();
}
return elasticSearchService.searchArticleData(request);
}
/**
- 业务层接口
- @author XiaoRenPing
*/
public interface IElasticSearchService {
PageResponse<ESArticleVo> searchArticleData(ESArticleSearchRequest request);
/**
- Elasticsearch 搜索业务实现层
- @author XiaoRenPing
*/
@Service
public class ElasticSearchServiceImpl implements IElasticSearchService {
private static Logger log = LoggerFactory.getLogger(ElasticSearchServiceImpl.class);
@Autowired
private ElasticsearchTemplate elasticsearchTemplate;
/**
* 分页搜索和标记显示
*/
@Override
public PageResponse<ESArticleVo> searchArticleData(ESArticleSearchRequest request) {
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery()
.should(QueryBuilders.matchQuery("articleTitle", request.getKeyword()))
.should(QueryBuilders.matchQuery("articleContent", request.getKeyword()))
.should(QueryBuilders.matchQuery("description", request.getKeyword()));
// 创建高亮查询
NativeSearchQueryBuilder nativeSearchQuery = new NativeSearchQueryBuilder();
nativeSearchQuery.withQuery(boolQueryBuilder);
nativeSearchQuery.withHighlightFields(new HighlightBuilder.Field("articleTitle"),
new HighlightBuilder.Field("articleContent"),
new HighlightBuilder.Field("description"));
nativeSearchQuery.withHighlightBuilder(new HighlightBuilder().preTags("<span style='color:red'>").postTags("</span>"));
nativeSearchQuery.withPageable(new PageRequest(request.getPageNum() - 1, request.getPageSize())); // 设置分页,页码要减1
// 分页对象
AggregatedPage<ESArticleVo> eSearchPage = elasticsearchTemplate.queryForPage(nativeSearchQuery.build(), ESArticleVo.class,
new SearchResultMapper() {
@Override
public <T> AggregatedPage<T> mapResults(SearchResponse response, Class<T> clazz, Pageable pageable) {
ArrayList<ESArticleVo> list = new ArrayList<ESArticleVo>();
SearchHits hits = response.getHits();
for (SearchHit searchHit : hits) {
if (hits.getHits().length <= 0) {
return null;
}
Map<String, Object> sourceAsMap = searchHit.getSourceAsMap();
String busId = (String) sourceAsMap.get("busId");
String articleTitle = (String) sourceAsMap.get("articleTitle");
String articleAuthor = (String) sourceAsMap.get("articleAuthor");
String description = (String) sourceAsMap.get("description");
String name = (String) sourceAsMap.get("name");
String content = (String) sourceAsMap.get("content");
String type = (String) sourceAsMap.get("type");
Date createTime = DateUtils.longToDateTime((Long) sourceAsMap.get("createTime"));
ESArticleVo seArticleVo = new ESArticleVo();
HighlightField highLightField = searchHit.getHighlightFields().get("articleTitle");
if (highLightField == null) {
seArticleVo.setArticleTitle(articleTitle);
} else {
seArticleVo.setArticleTitle(highLightField.fragments()[0].toString());
}
highLightField = searchHit.getHighlightFields().get("description");
if (highLightField == null) {
seArticleVo.setDescription(description);
} else {
seArticleVo.setContent(highLightField.fragments()[0].toString());
}
highLightField = searchHit.getHighlightFields().get("content");
if (highLightField == null) {
seArticleVo.setContent(content);
} else {
String hcontent = highLightField.fragments()[0].toString();
seArticleVo.setContent(hcontent);
}
highLightField = searchHit.getHighlightFields().get("busId");
if (highLightField == null) {
seArticleVo.setBusId(busId);
} else {
seArticleVo.setBusId(highLightField.fragments()[0].toString());
}
seArticleVo.setName(name);
seArticleVo.setCreateTime(createTime);
seArticleVo.setArticleAuthor(articleAuthor);
seArticleVo.setType(type);
list.add(seArticleVo);
}
AggregatedPage<T> pageResult = new AggregatedPageImpl<T>((List<T>) list, pageable, hits.getTotalHits());
return pageResult;
}
});
PageResponse response = new PageResponse();
response.setMessage("请求成功");
response.setStatus(200);
response.setList(eSearchPage.getContent());
response.setTotal(Long.valueOf(eSearchPage.getTotalElements()));
response.setPageNum(Long.valueOf(eSearchPage.getNumber()));
response.setPageSize(Long.valueOf(eSearchPage.getPageable().getPageSize()));
return response;
}
/**
- 搜索参数
- @author XiaoRenPing
*/
public class ESArticleSearchRequest{
private String keyword;
private int pageNum;
private int pageSize;
前端显示代码
【第四步】看界面
另外附上列表查询代码(无分页):
/**
* 列表查询,标记
* @param request 查询关键词
* @return DataResponse
*/
@Override
public DataResponse<List> searchArticleDataList(ESArticleSearchRequest request) {
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery()
.should(QueryBuilders.matchQuery(“articleTitle”, request.getKeyword()))
.should(QueryBuilders.matchQuery(“articleContent”, request.getKeyword()))
.should(QueryBuilders.matchQuery(“description”, request.getKeyword()));
// 创建高亮查询
NativeSearchQuery nativeSearchQuery = new NativeSearchQueryBuilder().withQuery(boolQueryBuilder)
.withHighlightFields(new HighlightBuilder.Field(“articleTitle”),
new HighlightBuilder.Field(“articleContent”), new HighlightBuilder.Field(“description”))
.withHighlightBuilder(new HighlightBuilder().preTags(“”).postTags(“”))
.build();
AggregatedPage<ESArticleVo> page = elasticsearchTemplate.queryForPage(nativeSearchQuery, ESArticleVo.class,
new SearchResultMapper() {
@Override
public <T> AggregatedPage<T> mapResults(SearchResponse response, Class<T> clazz,
Pageable pageable) {
ArrayList<ESArticleVo> list = new ArrayList<ESArticleVo>();
SearchHits hits = response.getHits();
for (SearchHit searchHit : hits) {
if (hits.getHits().length <= 0) {
return null;
}
Map<String, Object> sourceAsMap = searchHit.getSourceAsMap();
String busId = (String) sourceAsMap.get("busId");
String articleTitle = (String) sourceAsMap.get("articleTitle");
String articleAuthor = (String) sourceAsMap.get("articleAuthor");
String description = (String) sourceAsMap.get("description");
String name = (String) sourceAsMap.get("name");
String content = (String) sourceAsMap.get("content");
String type = (String) sourceAsMap.get("type");
Date createTime = DateUtils.longToDateTime((Long) sourceAsMap.get("createTime"));
// 高亮字段
ESArticleVo seArticleVo = new ESArticleVo();
HighlightField highLightField = searchHit.getHighlightFields().get("articleTitle");
if (highLightField == null) {
seArticleVo.setArticleTitle(articleTitle);
} else {
seArticleVo.setArticleTitle(highLightField.fragments()[0].toString());
}
highLightField = searchHit.getHighlightFields().get("description");
if (highLightField == null) {
seArticleVo.setDescription(description);
} else {
seArticleVo.setContent(highLightField.fragments()[0].toString());
}
highLightField = searchHit.getHighlightFields().get("content");
if (highLightField == null) {
seArticleVo.setContent(content);
} else {
String hcontent = highLightField.fragments()[0].toString();
seArticleVo.setContent(hcontent);
}
highLightField = searchHit.getHighlightFields().get("busId");
if (highLightField == null) {
seArticleVo.setBusId(busId);
} else {
seArticleVo.setBusId(highLightField.fragments()[0].toString());
}
seArticleVo.setName(name);
seArticleVo.setCreateTime(createTime);
seArticleVo.setArticleAuthor(articleAuthor);
seArticleVo.setType(type);
list.add(seArticleVo);
}
return new AggregatedPageImpl<T>((List<T>) list);
}
});
return DataResponse.ok(page.getContent());
}
四、总结
Elasiticsearch 使用需要注意的几点问题:
(1)数据收集时去重整理完成后再保存到搜索服务器中,进行重复数据控制(本文中定时任务 demo 传入数据)。
(2)分页和列表的区别在于,分页需要重新计算页码(ES 没有对应设置每页记录数),执行查询时需要设置 nativeSearchQuery.withPageable (new PageRequest (request.getPageNum () - 1, request.getPageSize ())); 查询到结果后需要计算页码(具体说明在【第三步】的(3)业务实现代码)。
(3)ES 查询结果后,单独处理关键字,命中关键字部分通过 withHighlightBuilder ().preTags 方法设置命中文本标记。
nativeSearchQuery.withHighlightBuilder(new HighlightBuilder().preTags("<span style='color:red'>").postTags("</span>"));
(4)ES 搜索功能实现尽量封装成通用类型,可以实现不同类型内容的检索。
finally, demo 功能完成!
--------------------------------------------------------------------->>[Every problem has its solutions]
更多推荐
所有评论(0)