简单的es高亮实战
项目是根据狂神说大佬的一个简单项目实现的,分析京东页面的搜索页面,抓取商品名称,商品图片,和商品价格三个参数,存在自己的es中,然后通过页面进行展示,主要是为了使用es的高亮功能.前端的部分,我就直接贴过来.............
·
es高亮实战
项目是根据狂神说大佬的一个简单项目实现的,分析京东页面的搜索页面,抓取商品名称,商品图片,和商品价格三个参数,存在自己的es中,然后通过页面进行展示,主要是为了使用es的高亮功能.前端的部分,我就直接贴过来.
爬取京东页面
引入jsoup包:
<!-- 解析网页 爬视频可 研究tiko -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
京东的根据关键字查询商品信息的老页面:
http://search.jd.com/search?keyword=
内容类:
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.io.Serializable;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content implements Serializable {
private static final long serialVersionUID = -8049497962627482693L;
private String name;
private String img;
private String price;
}
写个工具类进行抓取:
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class HtmlParseUtil {
public static void main(String[] args) throws IOException {
System.out.println(parseJD("益达"));
}
public static List<Content> parseJD(String keyword) throws IOException {
/// 使用前需要联网
// 请求url
String url = "http://search.jd.com/search?keyword=" + keyword;
// 1.解析网页(jsoup 解析返回的对象是浏览器Document对象)
Document document = Jsoup.parse(new URL(url), 30000);
// 使用document可以使用在js对document的所有操作
// 2.获取元素(通过id)
Element j_goodsList = document.getElementById("J_goodsList");
// 3.获取J_goodsList ul 每一个 li
Elements lis = j_goodsList.getElementsByTag("li");
// 4.获取li下的 img、price、name
// list存储所有li下的内容
List<Content> contents = new ArrayList<Content>();
for (Element li : lis) {
// 由于网站图片使用懒加载,将src属性替换为data-lazy-img
String img = li.getElementsByTag("img").eq(0).attr("data-lazy-img");// 获取li下 第一张图片
String name = li.getElementsByClass("p-name").eq(0).text();
String price = li.getElementsByClass("p-price").eq(0).text();
// 封装为对象
Content content = new Content(name, img, price);
// 添加到list中
contents.add(content);
}
return contents;
}
}
执行结果:
进行数据保存,和数据查询
通过es将抓取的数据放入es中并高亮查询,对应的service类:
import com.alibaba.fastjson.JSON;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@Service
public class ContentService {
@Autowired
private RestHighLevelClient restHighLevelClient;
// 1、解析数据放入 es 索引中
public Boolean parseContent(String keyword) throws IOException {
// 获取内容
List<Content> contents = HtmlParseUtil.parseJD(keyword);
// 内容放入 es 中
BulkRequest bulkRequest = new BulkRequest();
// 可跟具实际业务设置
bulkRequest.timeout("2m");
for (int i = 0; i < contents.size(); i++) {
bulkRequest.add(
new IndexRequest("jd_goods")
.id("" + (i + 1))
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON)
);
}
BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();
}
// 进行高亮查询
public List<Map<String, Object>> highlightSearch(String keyword, Integer pageIndex, Integer pageSize) throws IOException {
SearchRequest searchRequest = new SearchRequest("jd_goods");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
// 模糊查询,添加查询条件
MatchQueryBuilder matchBuilder = QueryBuilders.matchQuery("name", keyword);
searchSourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
searchSourceBuilder.query(matchBuilder);
// 分页
searchSourceBuilder.from(pageIndex);
searchSourceBuilder.size(pageSize);
// 高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("name");
highlightBuilder.preTags("<span style='color:red'>");
highlightBuilder.postTags("</span>");
searchSourceBuilder.highlighter(highlightBuilder);
// 执行查询
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
// 解析结果
SearchHits hits = searchResponse.getHits();
List<Map<String, Object>> results = new ArrayList<>();
for (SearchHit documentFields : hits.getHits()) {
// 使用新的字段值(高亮),覆盖旧的字段值
Map<String, Object> sourceAsMap = documentFields.getSourceAsMap();
// 高亮字段
Map<String, HighlightField> highlightFields = documentFields.getHighlightFields();
HighlightField name = highlightFields.get("name");
// 替换
if (name != null){
Text[] fragments = name.fragments();
StringBuilder new_name = new StringBuilder();
for (Text text : fragments) {
new_name.append(text);
}
sourceAsMap.put("name",new_name.toString());
}
results.add(sourceAsMap);
}
return results;
}
}
controller类:
import com.example.springbootes.service.ContentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.ResponseBody;
import java.io.IOException;
import java.util.List;
import java.util.Map;
@Controller
public class IndexController {
@GetMapping("/")
public String index() {
return "index";
}
@Autowired
private ContentService contentService;
@ResponseBody
@GetMapping("/parse/{keyword}")
public Boolean parse(@PathVariable("keyword") String keyword) throws IOException {
return contentService.parseContent(keyword);
}
@ResponseBody
@GetMapping("/h_search/{keyword}/{pageIndex}/{pageSize}")
public List<Map<String, Object>> highlightParse(@PathVariable("keyword") String keyword,
@PathVariable("pageIndex") Integer pageIndex,
@PathVariable("pageSize") Integer pageSize) throws IOException {
contentService.parseContent(keyword);
return contentService.highlightSearch(keyword,pageIndex,pageSize);
}
}
项目结果展示
项目源码:
es版本信息,springboot版本,跟前端相关资料都在源码里,需要自取
https://gitee.com/stackR/springboot-elasticsearch/
更多推荐
所有评论(0)