Filter data using nodejs and elasticsearch - node.js

I'm currently facing an issue with my datatable implemented in ReactJS. I'm retrieving data from elasticsearch and populating the datatable with it. The data retrieval process works fine without the filter applied, however, when I apply filters to the data, the datatable remains empty, even though the data _source has matching records.
The structure of the parameters I am sending is as follows:
{
pageIndex: 1,
pageSize: 10,
sort: { order: '', key: '' },
query: '',
filterData: {
analysis: [ '0', '1', '2', '3' ],
threat_level_id: [ '1', '2', '3', '4' ],
}
}
EndPoint:
POST /api/v1/events/public/list
Controller:
exports.getPublicEvents = async (req, res) => {
try {
client.ping()
const { pageIndex, pageSize, sort, query, filterData } = req.body
let esQuery = {
index: 'ns_*',
body: {
query: {
bool: {
must: [
{
match_all: {},
},
],
filter: [],
},
},
from: (pageIndex - 1) * pageSize,
size: pageSize,
},
}
if (query) {
esQuery.body.query.bool.must = [
{
match: {
'Event.info': {
query: query,
fuzziness: 'AUTO',
},
},
},
]
}
if (filterData.analysis.length > 0) {
esQuery.body.query.bool.filter.push({
terms: {
'Event.analysis': filterData.analysis,
},
})
}
if (filterData.threat_level_id.length > 0) {
esQuery.body.query.bool.filter.push({
terms: {
'Event.threat_level_id': filterData.threat_level_id,
},
})
}
let esResponse = await client.search(esQuery)
let data = esResponse.hits.hits.map((hit) => hit._source)
let total = esResponse.hits.total.value
res.status(200).json({
status: 'success',
data: data,
total: total,
})
} catch (error) {
res.status(500).json({
error: 'Error connecting to Elasticsearch',
errorMessage: error.message,
})
}
}
The controller below is without filters and it works just fine.
exports.getPublicEvents = async (req, res) => {
try {
client.ping()
const { pageIndex, pageSize, sort, query } = req.body
let esQuery = {
index: 'ns_*',
body: {
query: {
match_all: {},
},
from: (pageIndex - 1) * pageSize,
size: pageSize,
},
}
if (query) {
esQuery.body.query = {
match: {
'Event.info': {
query: query,
fuzziness: 'AUTO',
},
},
}
}
let esResponse = await client.search(esQuery)
let data = esResponse.hits.hits.map((hit) => hit._source)
let total = esResponse.hits.total.value
res.status(200).json({
status: 'success',
data: data,
total: total,
})
} catch (error) {
res.status(500).json({
error: 'Error connecting to Elasticsearch',
errorMessage: error.message,
})
}
}
ElasticSearech version: 7.17.8
Result of: console.log(JSON.stringify(esQuery))
{
"index": "INDEX_NAME",
"body": {
"query": {
"bool": {
"must": [{ "match_all": {} }],
"filter": [
{ "terms": { "Event.analysis": ["0", "1", "2"] } },
{ "terms": { "Event.threat_level_id": ["1", "2", "3", "4"] } }
]
}
},
"from": 0,
"size": 10
}
}
Data in elascticsearch schema
{
"#version": "1",
"#timestamp": "2023-02-01T14:43:09.997Z",
"Event": {
"info": ".......................",
"description": ".......................",
"analysis": 0,
"threat_level_id": "4",
"created_at": 1516566351,
"uuid": "5a64f74f0e543738c12bc973322",
"updated_at": 1675262417
}
}
Index Mapping
{
"index_patterns": ["INDEX_NAME"],
"template": "TEMPLATE_NAME",
"settings": {
"number_of_replicas": 0,
"index.mapping.nested_objects.limit": 10000000
},
"mappings": {
"dynamic": false,
"properties": {
"#timestamp": {
"type": "date"
},
"Event": {
"type": "nested",
"properties": {
"date_occured": {
"type": "date"
},
"threat_level_id": {
"type": "integer"
},
"description": {
"type": "text"
},
"is_shared": {
"type": "boolean"
},
"analysis": {
"type": "integer"
},
"uuid": {
"type": "text"
},
"created_at": {
"type": "date"
},
"info": {
"type": "text"
},
"shared_with": {
"type": "nested",
"properties": {
"_id": {
"type": "text"
}
}
},
"updated_at": {
"type": "date"
},
"author": {
"type": "text"
},
"Attributes": {
"type": "nested",
"properties": {
"data": {
"type": "text"
},
"type": {
"type": "text"
},
"uuid": {
"type": "text"
},
"comment": {
"type": "text"
},
"category": {
"type": "text"
},
"value": {
"type": "text"
},
"timestamp": {
"type": "date"
}
}
},
"organisation": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"uuid": {
"type": "text"
}
}
},
"Tags": {
"type": "nested",
"properties": {
"color": {
"type": "text"
},
"name": {
"type": "text"
}
}
},
"TLP": {
"type": "nested",
"properties": {
"color": {
"type": "text"
},
"name": {
"type": "text"
}
}
}
}
}
}
}
}

Event is a nested field, so you need to use nested queries, like this:
{
"index": "INDEX_NAME",
"body": {
"query": {
"bool": {
"must": [{ "match_all": {} }],
"filter": [
{
"nested": {
"path": "Event",
"query": {"terms": { "Event.analysis": ["0", "1", "2"] }}
}
},
{
"nested": {
"path": "Event",
"query": {"terms": { "Event.threat_level_id": ["1", "2", "3", "4"] }}
}
}
]
}
},
"from": 0,
"size": 10
}
}

Related

Filter nested result inside a nested object with elasticsearch

I'm trying to filter a nested object and sort by the result, however, I tried some things without success, I'll leave my initial attempt and it works partially, it just filters according to what I have in my search variable, but all the results come of this nested object as it is inside the 'root' which is another nested object
Elastic version: 7.13.0 with NodeJS
using #elastic/elasticsearch official package from npm
let params: RequestParams.Search = {
index: index,
body: {
size: 30,
query: {
bool: {
must: [
{
nested: {
path: "profile",
query: {
bool: {
must: [
{
match: {
"profile.id": profileId,
},
},
],
},
},
},
},
],
filter: [
{
nested: {
path: "profile.following",
ignore_unmapped: true,
query: {
query_string: {
fields: [
"profile.following.name",
"profile.following.username",
],
query: searchWord + "*",
},
},
},
},
],
},
},
},
};
I need it to be this specific 'profile.id' that is passed by parameter in the function, so the result is only 1 profile with N people that it follows
the document is mapped as follows, I left only the fields relevant to the question:
{
"mappings": {
"_doc": {
"properties": {
"id": {
"type": "integer"
},
"phone": {
"type": "text"
},
"profile": {
"type": "nested",
"properties": {
"id": {
"type": "integer"
},
"username": {
"type": "text"
},
"following": {
"type": "nested",
"properties": {
"id": {
"type": "integer"
},
"isAwaitingApproval": {
"type": "boolean"
},
"name": {
"type": "text"
},
"profilePicURL": {
"type": "text"
},
"username": {
"type": "text"
}
}
}
}
}
}
}
}
}
an example of a current result is:
with the following parameters (profileId:141, searchWord: "para" )
{
"res": [
{
"profilePicURL": "localimage",
"name": "donor donor",
"id": 140,
"username": "victorTesteElastic2",
"isAwaitingApproval": false
},
{
"profilePicURL": "localimage",
"name": "para ser seguido",
"id": 142,
"username": "victorprivate",
"isAwaitingApproval": true
}
]
}
the desired result is:
{
"res": [
{
"profilePicURL": "localimage",
"name": "para ser seguido",
"id": 142,
"username": "victorprivate",
"isAwaitingApproval": true
}
]
}
with some more research I got what I needed, I'll leave the answer here in case anyone needs it too
let params: RequestParams.Search = {
index: index,
body: {
size: 30,
query: {
bool: {
must: [
{
nested: {
path: "profile",
query: {
bool: {
must: [
{
match: {
"profile.id": profileId,
},
},
],
},
},
},
},
{
nested: {
path: "profile",
inner_hits: {
name: "profile",
},
query: {
nested: {
path: "profile.following",
inner_hits: {
name: "following",
},
ignore_unmapped: true,
query: {
query_string: {
fields: [
"profile.following.name",
"profile.following.username",
],
query: searchWord + "*",
},
},
},
},
},
},
],
},
},
},
};
I basically put in must what was in the filter, mapped the nested object from above, in this case the profile, and put the tag inner_hits for profile and inner_hits for followings, that's the only way it worked
the answer I need was returned here:
body.hits.hits[0].inner_hits.profile.hits.hits[0].inner_hits.following.hits.hits
below is an example of the answer:
{
"res": [
{
"_index": "donor",
"_type": "_doc",
"_id": "P3VWNnsB4coAEhD-F3fF",
"_nested": {
"field": "profile",
"offset": 0,
"_nested": {
"field": "following",
"offset": 0
}
},
"_score": 1,
"_source": {
"profilePicURL": "localimage",
"name": "donor donor",
"id": 140,
"username": "victorTesteElastic2",
"isAwaitingApproval": false
}
},
{
"_index": "donor",
"_type": "_doc",
"_id": "P3VWNnsB4coAEhD-F3fF",
"_nested": {
"field": "profile",
"offset": 0,
"_nested": {
"field": "following",
"offset": 1
}
},
"_score": 1,
"_source": {
"profilePicURL": "localimage",
"name": "para ser seguido",
"id": 142,
"username": "victorprivate",
"isAwaitingApproval": true
}
}
]
}
the filtered data I really need that have been matched in must is in this array, where I need to iterate and look at _source which is the data that is indexed

ElasticSearch query works on dev console but fails in NodeJS instance

I have the following ElasticSearch query that used to retrieve a bunch of pages (search) paginated using a cursor offset.
Query
GET _search
{
"search_after": [
1.8574909,
"urn:sample/78PsC1EHG6nopQCA/n/749d1ed1-d08d-44a1-abac-9ebad8c76697"
],
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"_id": {
"order": "asc"
}
}
],
"size": 1,
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"type": "node"
}
},
{
"query_string": {
"query": "a",
"fields": [
"node.text.string",
"node.text.string.english"
]
}
}
]
}
}
]
}
}
],
"filter": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"workspace.type": "space"
}
},
{
"terms": {
"workspace.id": [
"72MsGpeV9zGu5ytZ"
]
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"workspace.type": "user"
}
},
{
"term": {
"workspace.id": "8"
}
}
]
}
}
]
}
}
}
},
"highlight": {
"fields": {
"comment.body.text": {},
"comment.body.text.english": {},
"node.text.string": {},
"node.text.string.english": {}
}
}
}
The query works fine in the dev console and returns a hit with an edge.
Result
{
"took" : 136,
"timed_out" : false,
"num_reduce_phases" : 2,
"_shards" : {
"total" : 929,
"successful" : 929,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 5,
"max_score" : null,
"hits" : [
{
"_index" : "sample-development-john",
"_type" : "_doc",
"_id" : "urn:sample:d/78PsC1EHG6nopQCA/n/4230ca5a-5e1e-48ee-a70f-95f7b2d09995",
"_score" : 1.7747711,
"_source" : {
"document_id" : "78PsC1EHG6nopQCA",
"type" : "node",
"workspace" : {
"type" : "space",
...
However, performing the same query through my NodeJS resolver using the Javascript ES API fails when I copy the exact same query in my code.
SearchResolver.js
// parse the cursor argument for pagination.
search_after = JSON.parse(Buffer.from(args.after, 'base64').toString('ascii'));
const response: ApiResponse<Taskade.Backend.Elasticsearch.SearchBody> = await client.search({
index,
body: {
search_after: [
1.8574909,
'urn:taskade:d/78PsC1EHG6nopQCA/n/749d1ed1-d08d-44a1-abac-9ebad8c76697',
],
sort: [
{
_score: {
order: 'desc',
},
},
{
_id: {
order: 'asc',
},
},
],
size: 1,
query: {
bool: {
must: [
{
bool: {
should: [
{
bool: {
must: [
{
term: {
type: 'node',
},
},
{
query_string: {
query: 'a',
fields: ['node.text.string', 'node.text.string.english'],
},
},
],
},
},
],
},
},
],
filter: {
bool: {
should: [
{
bool: {
must: [
{
term: {
'workspace.type': 'space',
},
},
{
terms: {
'workspace.id': ['72MsGpeV9zGu5ytZ'],
},
},
],
},
},
{
bool: {
must: [
{
term: {
'workspace.type': 'user',
},
},
{
term: {
'workspace.id': '8',
},
},
],
},
},
],
},
},
},
},
highlight: {
fields: {
'comment.body.text': {},
'comment.body.text.english': {},
'node.text.string': {},
'node.text.string.english': {},
},
},
},
}
Error
{
"errors": [
{
"message": "parsing_exception",
"locations": [
{
"line": 2,
"column": 3
}
],
"path": [
"search"
],
"extensions": {
"code": "INTERNAL_SERVER_ERROR",
"exception": {
"name": "ResponseError",
"meta": {
"body": {
"error": {
"root_cause": [
{
"type": "parsing_exception",
"reason": "Unknown key for a VALUE_STRING in [query].",
"line": 1,
"col": 10
}
],
"type": "parsing_exception",
"reason": "Unknown key for a VALUE_STRING in [query].",
"line": 1,
"col": 10
},
"status": 400
},
"statusCode": 400,
"headers": {
"date": "Mon, 28 Dec 2020 11:58:05 GMT",
"content-type": "application/json; charset=UTF-8",
"content-length": "239",
"connection": "keep-alive",
"access-control-allow-origin": "*"
},
"warnings": null,
"meta": {
"context": null,
"request": {
"params": {
"method": "POST",
"path": "/sample-development-john/_search",
"body": "{\"query\":\"a\",\"size\":1,\"sort\":[{\"_score\":{\"order\":\"asc\"}},{\"_id\":{\"order\":\"desc\"}}],\"search_after\":[1.7747711,\"urn:sample:d/78PsC1EHG6nopQCA/n/4230ca5a-5e1e-48ee-a70f-95f7b2d09995\"]}",
"querystring": "",
"headers": {
"User-Agent": "elasticsearch-js/6.8.7 (darwin 20.2.0-x64; Node.js v10.21.0)",
"Content-Type": "application/json",
"Content-Length": "182"
},
"timeout": 30000
},
"options": {
"warnings": null
},
"id": 2
},
"name": "elasticsearch-js",
"connection": {
"url": "https://search-sample-dev-526nv5wyqxj6ahzcql2cyndz5e.us-east-1.es.amazonaws.com/",
"id": "https://search-sample-dev-526nv5wyqxj6ahzcql2cyndz5e.us-east-1.es.amazonaws.com/",
"headers": {},
"deadCount": 0,
"resurrectTimeout": 0,
"_openRequests": 0,
"status": "alive",
"roles": {
"master": true,
"data": true,
"ingest": true,
"ml": false
}
},
"attempts": 0,
"aborted": false
}
},
"stacktrace": [
"ResponseError: parsing_exception",
" at IncomingMessage.response.on (/Users/john/dev/sample/ft/node_modules/#elastic/elasticsearch/lib/Transport.js:296:25)",
" at IncomingMessage.emit (events.js:203:15)",
" at IncomingMessage.EventEmitter.emit (domain.js:466:23)",
" at endReadableNT (_stream_readable.js:1145:12)",
" at process._tickCallback (internal/process/next_tick.js:63:19)"
]
}
}
}
],
"data": {
"search": null
}
}
This error occurs when I pass the after (search_after) parameter to my GraphQL call, despite the values for the query all being hardcoded (including search_after as seen in the first Query snippet).
// query will work if `after` is not passed. The values here don't matter since for testing purposes, all values are hardcoded in the resolver.
query {
search(after: "WzEuODU3NDkwOSwidXJuOnRhc2thZGU6ZC83OFBzQzFFSEc2bm9wUUNBL24vNzQ5ZDFlZDEtZDA4ZC00NGExLWFiYWMtOWViYWQ4Yzc2Njk3Il0=", first:1, filterby:{query:"a"}){
edges{
cursor
node {
...
Why is the query working in the dev console, but not working in my resolver when I pass the after (search_after after parsing the JSON) params to the GQL call despite the query being hardcoded and identically similar.

Get suggest simple_phrase query values returns "undefined"

How to get suggested query values from elasticsearch nodejs client.
I have such kind of index named test :
PUT test
{
"settings": {
"index": {
"number_of_shards": 1,
"analysis": {
"analyzer": {
"trigram": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase","shingle"]
},
"reverse": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase","reverse"]
}
},
"filter": {
"shingle": {
"type": "shingle",
"min_shingle_size": 2,
"max_shingle_size": 3
}
}
}
}
},
"mappings": {
"properties": {
"word": {
"type": "text",
"fields": {
"trigram": {
"type": "text",
"analyzer": "trigram"
},
"reverse": {
"type": "text",
"analyzer": "reverse"
}
}
}
}
}
}
and want to take suggest.simple_phrase values by the help of elasticsearch client by this way:
getElasticSearchResult: (_, params, context) =>
new Promise((resolve, reject) => {
const param: RequestParams.Search = {
index: 'test',
body: {
suggest: {
text: 'params.searchText',
simple_phrase: {
phrase: {
field: 'word.trigram',
size: 1,
gram_size: 3,
direct_generator: [
{
field: 'word.trigram',
suggest_mode: 'always',
},
],
highlight: {
pre_tag: '<em>',
post_tag: '</em>',
},
},
},
},
},
};
client
.search(param)
.then((result: ApiResponse) => {
resolve(result.body.suggest.simple_phrase);
console.log('---- ', resolve(result.body));
// keep results in array (allRecords)
})
.catch((err: Error) => {
console.log(err);
reject(err);
});
}).then(v => {
return allRecords;
}),
},
but result.body is undefined and why?
request comes correctly and result structure is also correct but inside everything is empty.

ElasticSearch JS Client Search by

I'm getting started with ElasticSearch and I'm getting into troubles (I'm not understanding as It has to be) how to search.
First, I have this two documents:
{
"took": 133
"timed_out": false
"_shards": {
"total": 5
"successful": 5
"failed": 0
}
"hits": {
"total": 2
"max_score": 1
"hits": [2]
0: {
"_index": "app"
"_type": "player"
"_id": "AVcLCOgAi_gt2Fih02MK"
"_score": 1
"_source": {
"nickName": "sarasa"
"birthDate": "1994-11-05T13:15:30.000Z"
"state": "sarasa"
"adminState": "sarasa"
"id": ""
"account": {
"type": "yojuego"
"id": "asasdfa"
"password": "asd fasd"
}
}
}
1: {
"_index": "app"
"_type": "player"
"_id": "AVcQ7JNVi_gt2Fih02MN"
"_score": 1
"_source": {
"nickName": "facundo"
"birthDate": "1994-11-05T13:15:30.000Z"
"state": "verdura"
"adminState": "sudo"
"id": ""
"account": {
"type": "yojuego"
"id": "facundo#facundo"
"password": "pepe"
}
}
}
}
}
}
I want to get where account.id = "facundo#facundo" and account.type = "yojuego".
I'm doing this:
client.search({
index: 'app',
type: 'player',
query: {
bool: {
must: [
{ term: { "account.id": 'facundo#facundo' } },
{ term: { "account.type": 'yojuego' } }
],
}
}
}, (error, response, status) => {
if (error) {
res.json(400, err);
}
else {
res.json(200, response.hits.hits);
}
});
This search is retrieving all documents I have into the index.
Any help?
Thanks!
PD: Here is how I created index and mapping:
client.indices.create({ index: 'yojuego' }, (err, resp, respcode) => {
if (!err) {
client.indices.putMapping({
index: 'app',
type: "player",
body: {
properties: {
nickName: { type: "string" },
birthDate: { type: "string" },
state: { type: "string" },
adminState: { type: "string" },
account: {
type: "nested",
properties: {
id: { type: "string" },
type: { type: "string" },
password: { type: "string" }
}
}
}
}
}, (err, resp, respcode) => {
res.json(200, resp);
});
}
});
make sure that account is a nested field and then apply this query,
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "account",
"query": {
"bool": {
"must": [
{
"match": {
"account.id": "facundo#facundo"
}
},
{
"match": {
"account.type": "yojuego"
}
}
]
}
}
}
}
]
}
}
}

ElasticSearch query stops working with big amount of data

The problem: I have 2 identical in terms of settings and mappings indexes.
The first index contains only 1 document.
The second index contains the same document + 16M of others.
When I'm running the query on the first index it returns the document, but when I do the same query on the second — I receive nothing.
Indexes settings:
{
"tasks_test": {
"settings": {
"index": {
"analysis": {
"analyzer": {
"tag_analyzer": {
"filter": [
"lowercase",
"tag_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"tag_filter": {
"type": "word_delimiter",
"type_table": "# => ALPHA"
}
}
},
"creation_date": "1444127141035",
"number_of_replicas": "2",
"number_of_shards": "5",
"uuid": "wTe6WVtLRTq0XwmaLb7BLg",
"version": {
"created": "1050199"
}
}
}
}
}
Mappings:
{
"tasks_test": {
"mappings": {
"Task": {
"dynamic": "false",
"properties": {
"format": "dateOptionalTime",
"include_in_all": false,
"type": "date"
},
"is_private": {
"type": "boolean"
},
"last_timestamp": {
"type": "integer"
},
"name": {
"analyzer": "tag_analyzer",
"type": "string"
},
"project_id": {
"include_in_all": false,
"type": "integer"
},
"user_id": {
"include_in_all": false,
"type": "integer"
}
}
}
}
}
The document:
{
"_index": "tasks_test",
"_type": "Task",
"_id": "1",
"_source": {
"is_private": false,
"name": "135548- test with number",
"project_id": 2,
"user_id": 1
}
}
The query:
{
"query": {
"filtered": {
"query": {
"bool": {
"must": [
[
{
"match": {
"_all": {
"query": "135548",
"type": "phrase_prefix"
}
}
}
]
]
}
},
"filter": {
"bool": {
"must": [
{
"term": {
"is_private": false
}
},
{
"terms": {
"project_id": [
2
]
}
},
{
"terms": {
"user_id": [
1
]
}
}
]
}
}
}
}
}
Also, some findings:
if I replace _all with name everything works
if I replace match_phrase_prefix with match_phrase works too
ES version: 1.5.1
So, the question is: how to make the query work for the second index without mentioned hacks?

Resources