es脚本(script)的简单使用

前言

最近工作需要,要求对es中的数据进行简单统计,根据笔者翻阅资料,需要用到script脚本。现有一个products索引,以其为例,数据类型映射和内容示例如下

//数据结构mapping
{
  "products" : {
    "mappings" : {
      "properties" : {
        "id" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "name" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "num" : {
          "type" : "long"
        },
        "type" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        }
      }
    }
  }
}


//数据示例
{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 100,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "products",
        "_type" : "_doc",
        "_id" : "1d47cd3b9a444ae99763f7f84b954f85",
        "_score" : 1.0,
        "_source" : {
          "id" : "1d47cd3b9a444ae99763f7f84b954f85",	//id为简单的去掉-的UUID
          "name" : "商品0",	//商品名称
          "num" : 2,	//商品数量
          "type" : "1"	//商品类型,有“1”“2”“3”“4”共四种
        }
      }
    ]
  }
}

需求说明

对其中的内容进行简单统计:首先根据type进行分组,接着统计不同type中商品数量大于等于10和小于10的商品的数量

DSL

GET products/_search
{
  "size": 0,
  "aggs": {	//第一层聚合,用于对type进行分组
    "typeCount": {
      "terms": {
        "field": "type.keyword",	//type是text类型,不可以用于聚合,根据上面的mapping,它的keyword属性的类型是keyword,可以聚合
        "size": 100
      },
      "aggs": {	//第二层聚合,用于将之前聚合结果中的每一个桶(分组)中大于等于10和小于10的区分开
        "t1": {
          "terms": {
            "script": {	//这里使用脚本,内部除了source以外,可以增加一个属性lang,默认是painless
            "source": """	//多行的script用一对三个双引号包裹起来,单行的直接用一对单个双引号即可
              if(doc['num'].value >= 10) {	//注意num是long类型,所以可以直接用,如果是字符串,需要是keyword类型
                return '大于等于10'	//return的语句得到的结果会作为分组后的key
              }
              return '小于10'
            """
        }
          }
        }
      }
    }
  }
}

结果如下:

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 100,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "typeCount" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "4",
          "doc_count" : 31,
          "t1" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "大于等于10",
                "doc_count" : 17
              },
              {
                "key" : "小于10",
                "doc_count" : 14
              }
            ]
          }
        },
        {
          "key" : "2",
          "doc_count" : 24,
          "t1" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "小于10",
                "doc_count" : 13
              },
              {
                "key" : "大于等于10",
                "doc_count" : 11
              }
            ]
          }
        },
        {
          "key" : "1",
          "doc_count" : 23,
          "t1" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "小于10",
                "doc_count" : 14
              },
              {
                "key" : "大于等于10",
                "doc_count" : 9
              }
            ]
          }
        },
        {
          "key" : "3",
          "doc_count" : 22,
          "t1" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
              {
                "key" : "大于等于10",
                "doc_count" : 18
              },
              {
                "key" : "小于10",
                "doc_count" : 4
              }
            ]
          }
        }
      ]
    }
  }
}
Logo

为开发者提供学习成长、分享交流、生态实践、资源工具等服务,帮助开发者快速成长。

更多推荐