es全文搜索策略

搜索需求:拼音搜索、汉字词组、英文词组、数字搜索
过滤字符:截取前50个字为关键词,超过长度的文本不计入有效关键词
匹配策略:
一个汉字或单词:完全匹配
截取英文部分,如果符合拼音全拼:拼音搜索匹配
如果是全英文、空格、数字:完全匹配
中文、其他:中文分词搜索+完全匹配

实现代码

    /**
     *搜索
     */
    @Override
    public RtData search(String keywords, Integer nameLimit, Integer contentLimit, Long uid) {
        List list = new ArrayList<>();
        if (StringUtils.isEmpty(keywords)) {
            return ResponseBuilder.success(list);
        }
        keywords = keywords.trim();
        if (keywords.length() > 50) {
            keywords = keywords.substring(0, 50);
        }
        if (nameLimit == null || nameLimit > Constants.MAX_FILE_NAME_LENGTH) {
            nameLimit = Constants.MAX_FILE_NAME_LENGTH;
        }
        if (contentLimit == null || contentLimit > 500) {
            contentLimit = 500;
        }
        //关键词替换特殊字符
        keywords = keywords.replaceAll("\"","\\\\\"");
        Map<String, String> fidRoleMap = fsFileService.getOtherColFidRoleMap(uid);
        List<String> fidList = fidRoleMap.keySet().stream().collect(Collectors.toList());
        //正则匹配
        String queryStr;
        //一个汉字、或单词
        String pattern = "^[\\u4e00-\\u9fa5]";
        String match = "";
        if (Pattern.matches(pattern, keywords)) {
            //一个汉字
            queryStr = "{\"highlight\":{ \"fields\": {\"name" + match + "\": {\"fragment_size\" : " + nameLimit * 2 + "},\"content" + match + "\": {\"fragment_size\" : " + contentLimit * 2 + "}} },\"query\": {\"bool\": {\"must\": [ {\"bool\":{\"should\":[{\"match\": {\"name" + match + "\": { \"query\": \"" + keywords + "\",\"boost\": 2}}},{\"match\": {\"content" + match + "\": \"" + keywords + "\"}}]}},{\"bool\":{\"should\":[{ \"terms\":{ \"_id\":" + JSON.toJSONString(fidList) + " }},{\"term\":{ \"owner\": " + uid + "}}]}}]}},\"size\":300}";
        } else {
            //截取英文部分
            String s = "\\w+";
            Pattern pattern2 = Pattern.compile(s);
            Matcher matcher = pattern2.matcher(keywords);
            //空格或小写拼音
            pattern = "^(?!a-z)(\\s|a[io]?|ou?|e[inr]?|ang?|ng|[bmp](a[io]?|[aei]ng?|ei|ie?|ia[no]|o|u)|pou|me|m[io]u|[fw](a|[ae]ng?|ei|o|u)|fou|wai|[dt](a[io]?|an|e|[aeio]ng|ie?|ia[no]|ou|u[ino]?|uan)|dei|diu|[nl](a[io]?|ei?|[eio]ng|i[eu]?|i?ang?|iao|in|ou|u[eo]?|ve?|uan)|nen|lia|lun|[ghk](a[io]?|[ae]ng?|e|ong|ou|u[aino]?|uai|uang?)|[gh]ei|[jqx](i(ao?|ang?|e|ng?|ong|u)?|u[en]?|uan)|([csz]h?|r)([ae]ng?|ao|e|i|ou|u[ino]?|uan)|[csz](ai?|ong)|[csz]h(ai?|uai|uang)|zei|[sz]hua|([cz]h|r)ong|y(ao?|[ai]ng?|e|i|ong|ou|u[en]?|uan)){1,}";
            if (matcher.find() && Pattern.matches(pattern, keywords)) {
                //拼音
                match = ".pinyin";
                //含有拼音
                queryStr = "{\"highlight\":{ \"fields\": {\"name\" : {\"fragment_size\" : " + nameLimit * 2 + "},\"content\" : {\"fragment_size\" : " + contentLimit * 2 + "},\"name" + match + "\": {\"fragment_size\" : " + nameLimit * 2 + "},\"content" + match + "\": {\"fragment_size\" : " + contentLimit * 2 + "}} },\"query\": {\"bool\": {\"must\": [ {\"bool\":{\"should\":[{\"match\": {\"name" + match + "\": { \"query\": \"" + keywords + "\",\"boost\": 2}}},{\"match\": {\"content" + match + "\": \"" + keywords + "\"}},{\"wildcard\": {\"name\": {\"value\":\"*" + keywords.toLowerCase() + "*\",\"boost\":2}}},{\"wildcard\": {\"content\": {\"value\":\"*" + keywords.toLowerCase() + "*\",\"boost\":1}}}]}},{\"bool\":{\"should\":[{ \"terms\":{ \"_id\":" + JSON.toJSONString(fidList) + " }},{\"term\":{ \"owner\": " + uid + "}}]}}]}},\"size\":300}";
            } else if (Pattern.matches("^[a-zA-Z0-9\\s]{1,}$", keywords)) {
                //英文 空格 数字
                //匹配或者单字匹配
                queryStr = "{\"highlight\":{ \"fields\": {\"name\" : {\"fragment_size\" : " + nameLimit * 2 + "},\"content\" : {\"fragment_size\" : " + contentLimit * 2 + "} }},\"query\": {\"bool\": {\"must\": [ {\"bool\":{\"should\":[{\"match\": {\"name\": {\"query\": \"" + keywords + "\",\"boost\": 2}}},{\"match\": {\"content\": {\"query\": \"" + keywords + "\",\"boost\": 1}}},{\"wildcard\": {\"content\": {\"value\":\"*" + keywords.toLowerCase() + "*\",\"boost\":2}}},{\"wildcard\": {\"name\": {\"value\":\"*" + keywords.toLowerCase() + "*\",\"boost\":4}}}]}},{\"bool\":{\"should\":[{ \"terms\":{ \"_id\":" + JSON.toJSONString(fidList) + " }},{\"term\":{ \"owner\": " + uid + "}}]}}]}},\"size\":300}";
            } else {
                //中文、其他
                match = ".words";
                queryStr = "{\"highlight\":{ \"fields\": {\"name\" : {\"fragment_size\" : " + nameLimit * 2 + "},\"content\" : {\"fragment_size\" : " + contentLimit * 2 + "},\"name" + match + "\": {\"fragment_size\" : " + nameLimit * 2 + "},\"content" + match + "\": {\"fragment_size\" : " + contentLimit * 2 + "}} },\"query\": {\"bool\": {\"must\": [ {\"bool\":{\"should\":[{\"match\": {\"name\": {\"query\": \"" + keywords + "\",\"boost\": 2}}},{\"match\": {\"content\": {\"query\": \"" + keywords + "\",\"boost\": 1}}},{\"match\": {\"name" + match + "\": { \"query\": \"" + keywords + "\",\"boost\": 5}}},{\"match\": {\"content" + match + "\": { \"query\": \"" + keywords + "\",\"boost\": 2}}}]}},{\"bool\":{\"should\":[{ \"terms\":{ \"_id\":" + JSON.toJSONString(fidList) + " }},{\"term\":{ \"owner\": " + uid + "}}]}}]}},\"size\":300}";
            }

        }
        log.info(queryStr);
        Request request = new Request("GET", "/" + indexName + "/type/_search");
        try {
            HttpEntity entity = new NStringEntity(queryStr, ContentType.APPLICATION_JSON);
            //json 查询条件
            request.setEntity(entity);
            Response response = restClient.performRequest(request);
            String res = EntityUtils.toString(response.getEntity());
            System.out.println(res);
            log.info(res);
            JSONObject jsonObject = JSON.parseObject(res);
            JSONArray jsonArray = jsonObject.getJSONObject("hits").getJSONArray("hits");
            if (jsonArray.size() > 0) {
                for (int i = 0; i < jsonArray.size(); i++) {
                    JSONObject indexObj = (JSONObject) jsonArray.get(i);
                    JSONObject source = indexObj.getJSONObject("_source");
                    JSONObject highlight = indexObj.getJSONObject("highlight");
                    if (highlight != null) {
                        JSONArray nameHighlight = null;
                        JSONArray contentHighlight = null;
                        boolean namePinyinHighLight = false;
                        boolean contentPinyinHighLight = false;
                        if (match.equals(".pinyin")) {
                            if(highlight.getJSONArray("name") != null){
                                nameHighlight = highlight.getJSONArray("name");
                            }else {
                                namePinyinHighLight = true;
                                nameHighlight = highlight.getJSONArray("name" + match);
                            }
                            if(highlight.getJSONArray("content") != null){
                                contentHighlight = highlight.getJSONArray("content");
                            }else {
                                contentPinyinHighLight = true;
                                contentHighlight = highlight.getJSONArray("content" + match);
                            }
                        } else {
                            nameHighlight = highlight.getJSONArray("name" + match) != null ? highlight.getJSONArray("name" + match) : highlight.getJSONArray("name");
                            contentHighlight = highlight.getJSONArray("content" + match) != null ? highlight.getJSONArray("content" + match) : highlight.getJSONArray("content");
                        }
                        if (nameHighlight != null && nameHighlight.size() > 0) {
                            String text = "";
                            for (Object str : nameHighlight) {
                                text = StringUtils.isEmpty(text) ? (String) str : (text + "..." + str);
                            }
                            text = text.replaceAll("\\n", " ");
                            if (namePinyinHighLight) {
                                text = text.replaceAll("<em>", "").replaceAll("</em>","");
                            }
//                            if (match.equals(".pinyin")) {
//                                text = pinyinHighlightContentExactMatch(text, keywords);
//                            }
                            source.put("name", getHighlightContent(text, nameLimit));
                        } else {
                            //如果内容不为空,截取前limit个字符
                            if (source.get("name") != null) {
                                source.put("name", source.getString("name").replaceAll("\\n", " "));
                                source.put("name", source.getString("name").substring(0, source.getString("name").length() >= nameLimit ? nameLimit : source.getString("name").length()));
                            }
                        }
                        if (contentHighlight != null && contentHighlight.size() > 0) {
                            String text = "";
                            for (Object str : contentHighlight) {
                                text = StringUtils.isEmpty(text) ? (String) str : (text + "..." + str);
                            }
                            text = text.replaceAll("\\n", " ");
                            if (contentPinyinHighLight) {
                                text = text.replaceAll("<em>", "").replaceAll("</em>","");
                            }
                            source.put("content", getHighlightContent(text, contentLimit));
                        } else {
                            //如果内容不为空,截取前limit个字符
                            if (source.get("content") != null) {
                                source.put("content", source.getString("content").replaceAll("\\n", " "));
                                source.put("content", source.getString("content").substring(0, source.getString("content").length() >= contentLimit ? contentLimit : source.getString("content").length()));
                            }
                        }
                    } else {
                        if (!StringUtils.isEmpty(source.getString("name")) && source.getString("name").length() > nameLimit) {
                            source.put("name", source.getString("name").substring(0, nameLimit));
                        }
                        if (!StringUtils.isEmpty(source.getString("content")) && source.getString("content").length() > contentLimit) {
                            source.put("content", source.getString("content").substring(0, contentLimit));
                        }
                    }
                    list.add(source);
                }
            }
            return ResponseBuilder.success(list);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return ResponseBuilder.fail();
    }
发表新评论