Elastic search sort error - search_phase_execution_exception - node.js

I am facing an issue with sorting values in an elastic search query. I am doing a simple search with sort but getting the following error. The query works without a sort parameter.
Elastic search client version: Version 7.6.1(Using this version because I am using opensearch)
search_phase_execution_exception: [illegal_argument_exception] Reason:
Text fields are not optimised for operations that require per-document
field data like aggregations and sorting, so these operations are
disabled by default. Please use a keyword field instead.
Alternatively, set fielddata=true on [subtype] in order to load field
data by uninverting the inverted index. Note that this can use
significant memory.
Code Sample:
const {Client} = require('#elastic/elasticsearch') // Version 7.6.1
var connectionString = 'https://admin:admin#localhost:9200'
const client = new Client({
node: connectionString,
ssl: {
rejectUnauthorized: false
}
})
client.info()
.then(async response => {
console.log('success', response.statusCode)
var query = {
"query": {
"match": {
"revhostname": {
"query": "ten.tsacmoc.ac.1dsh.631-651-14-37-c",
},
},
},
"sort": [
{
"revhostname": {"order": "asc"},
"subtype": {"order": "asc"},
"value": {"order": "asc"},
}
],
};
var response = await client.search({
index: 'r7',
body: query,
});
console.log("Search results:", JSON.stringify(response));
})
.catch(error => {
console.error('error', JSON.stringify(error))
})
Mapping:
{
"properties": {
"revhostname": {
"type" : "keyword"
},
"value": {
"type" : "keyword"
},
"subtype": {
"type" : "keyword"
},
"timestamp": {
"type" : "long"
},
"ip": {
"type" : "ip"
}
}
}
I tried adding fielddata=true in mapping but the issue was not solved. Your help is much appreciated.
Thank you.

As you mentioned mapping in a comment, your revhostname field is defined as text and keyword both type of field and Elasticsearch dont allow sorting on text type of field.
If your mapping is still same as you mentioned in comment then you need to use the field name like revhostname.keyword which will resolved issue.
const {Client} = require('#elastic/elasticsearch') // Version 7.6.1
var connectionString = 'https://admin:admin#localhost:9200'
const client = new Client({
node: connectionString,
ssl: {
rejectUnauthorized: false
}
})
client.info()
.then(async response => {
console.log('success', response.statusCode)
var query = {
"query": {
"match": {
"revhostname": {
"query": "ten.tsacmoc.ac.1dsh.631-651-14-37-c",
},
},
},
"sort": [
{
"revhostname.keyword": {"order": "asc"},
"subtype.keyword": {"order": "asc"},
"value.keyword": {"order": "asc"},
}
],
};
var response = await client.search({
index: 'r7',
body: query,
});
console.log("Search results:", JSON.stringify(response));
})
.catch(error => {
console.error('error', JSON.stringify(error))
})

Related

Index elastic problem to scroll with Typescript, and with node and Express

I use the Javascript client of elastic search to index and search my data and I have a problem to use the scroll method.
I can't set the right index but I know I have the right technique because in the DevTools console of Kibana I get the right result.
In DevTools I have the following queries that work (I execute the first request which is a search and then the second request which is a scroll by providing the scrollId returned by the first request):
GET my_index-*/_search?scroll=1m
{
"query": {
"bool": {
"filter": [{
"match": {
"field":"data"
}
},{
"range": {
"#timestamp": {
"gte": "2020-05-08T10:00:00.100Z",
"lte": "2022-05-11T10:00:00.200Z",
"format": "strict_date_optional_time"
}
}
}]
}
},
"_source": {
"includes": ["some_field1", "some_field2"]
},
"sort": [
{
"#timestamp": {
"order": "desc"
}
}
],
"size": 100
}
GET _search/scroll
{
"scroll": "1m",
"scroll_id":"DnF1ZXJ5VG ... NtUHdsQQ=="
}
These two requests return the expected result.
My backend is in Typescript and uses NodeJs with Express.
The first request, the search, works normally and returns me the right result. It is this code :
some import
...
const client = new Client({
node: configElasticClient.node
})
export const searchRouter = express.Router();
searchRouter.get('/search', async (req, res) => {
const response = await client.search<SearchResponse<HitResult>>({
index: "my_index-*",
scroll: "1m",
body: {
query: {
"bool": {
"filter": [{
"match": {
"field_to_search":"data_to_search"
}
},{
"range": {
"#timestamp": {
"gte": "2020-05-08T10:00:00.100Z",
"lte": "2022-05-11T10:00:00.200Z",
"format": "strict_date_optional_time"
}
}
}]
}
},
_source: ["some_field1", "some_field2"],
sort: [
{
"#timestamp": {
order: "desc"
}
}
],
size: 100
}
});
console.log("result: ", response);
res.json(response);
});
Then, when I make the scroll I should change my index to: "_search/scroll". But no matter what I try, when I try to access my route using scroll I get a "Cannot GET (the route)" error.
Here is the code of this second route (I put in comment the other way tried):
searchRouter.get('/search/scrollnext/:scrollId', async (req, res) => {
const response = await client.scroll<SearchResponse<HitResult>>({
scroll_id: req.params.scrollId,
scroll: "1m",
//index: "_search/scroll", wherever I put this field I get an error because the scroll method has no index field defined
});
//I also try this way :
//const response = await client.scroll<SearchResponse<HitResult>>({
// method: 'GET',
// body: {
// scroll_id: req.params.scrollId,
// scroll: "1m",
//}});
console.log("result: ", response);
res.json(response);
});
I get my scroll_id as a parameter from the first request which I fill in my second request in the scroll function.
I think that my problem comes from the scroll index which must be wrong. Does anyone know how to fix the index used by the scroll method?
Any help would be greatly appreciated :)
Finally find the answer, the syntax was good and it wasn't an index problem, I had to add an async management layer in my request like this:
searchRouter.get('/path/to/route/:scrollId', (asyncHandler(async (req, res) => {
const response = await client.scroll<YourDataType>({
scroll_id: req.params.scrollId,
scroll: "1m",
});
res.json(response);
})));
with asyncHandler looking something like this:
export function asyncHandler(fct: some_interface_to_define_request_caracteristic): RequestHandler {
return (req, res, next) => {
//Here add a try/catch layer
}
}

Firebase database collection returns empty array when trying to get all documents

I'm trying to get all documents from my database collection "posts" but I'm getting an empty array instead.
The strange thing is that I'm able to get all documents from another collection called "users" that has the same structure and using the exact same code.
I've spent days looking for an answer but I haven't been able to find the solution.
This is the request:
const db = admin.firestore();
exports.getAllPosts = (req, res) => {
db.collection('posts')
.orderBy('createdAt', 'desc')
.get()
.then(snapshot => {
let posts = [];
snapshot.forEach((doc) => {
posts.push({
id: doc.id,
body: doc.data().body,
author: doc.data().author,
createdAt: doc.data().timestamp,
voteScore: doc.data().voteScore
});
});
return res.json(posts);
})
.catch(err => {
console.error(err);
res.status(500).json({ error: err.code });
});
}
And this is the response:
[]
This is what my current collection looks like:
Posts collection screenshot
This the response that I get when I return "snapshot":
{
"_query": {
"_firestore": {
"_settings": {
"projectId": "readable-bf7a6",
"firebaseVersion": "9.6.0",
"libName": "gccl",
"libVersion": "4.10.0 fire/9.6.0"
},
"_settingsFrozen": true,
"_serializer": {
"allowUndefined": false
},
"_projectId": "readable-bf7a6",
"registeredListenersCount": 0,
"bulkWritersCount": 0,
"_backoffSettings": {
"initialDelayMs": 100,
"maxDelayMs": 60000,
"backoffFactor": 1.3
},
"_clientPool": {
"concurrentOperationLimit": 100,
"maxIdleClients": 1,
"activeClients": {},
"failedClients": {},
"terminated": false,
"terminateDeferred": {
"promise": {}
}
}
},
"_queryOptions": {
"parentPath": {
"segments": []
},
"collectionId": "posts",
"converter": {},
"allDescendants": false,
"fieldFilters": [],
"fieldOrders": [
{
"field": {
"segments": [
"createdAt"
]
},
"direction": "DESCENDING"
}
],
"kindless": false
},
"_serializer": {
"allowUndefined": false
},
"_allowUndefined": false
},
"_readTime": {
"_seconds": 1622395245,
"_nanoseconds": 513743000
},
"_size": 0,
"_materializedDocs": null,
"_materializedChanges": null
}
Notice how the request for the collection "users" works successfully:
const db = admin.firestore();
exports.getAllUsers = (req, res) => {
db.collection('users')
.orderBy('createdAt', 'desc')
.get()
.then(snapshot => {
let users = [];
snapshot.forEach((doc) => {
let users = [];
snapshot.forEach((doc) => {
users.push({
id: doc.data().userId,
email: doc.data().email,
handle: doc.data().handle
});
});
return res.json(users);
})
.catch(err => {
console.error(err);
res.status(500).json({ error: err.code });
});
}
And the response:
[
{
"id": "EPoHBxhQFUXbcL3TCVx1LdUG2nO2",
"email": "ruben#gmail.com"
},
{
"id": "RqEa3dEq8TSDcZYeolXafju67rB2",
"email": "user10#gmail.com"
},
{
"id": "dxveb4n2iMQej5Q14uprsKRxFp23",
"email": "user4#gmail.com",
"handle": "user4"
},
{
"id": "YQPzBPcsqlVZk9iJEuZTHKUNuVG2",
"email": "user2#gmail.com",
"handle": "user2"
},
{
"id": "CZ05BJxi3TUOpIrmBaz539OWlbC3",
"email": "user#gmail.com",
"handle": "user"
},
{
"id": "t0t83BVwt4gVgJkDv7HL1r1MaKr1",
"email": "userJose2#gmail.com",
"handle": "Jose"
}
]
This is what the users collection looks like in Firebase:
Users collection screenshot
Why is one collection failing when the other works fine and I'm using the same code? What am I missing here?
Thanks in advance and I hope I've made it as clear as possible. Please let me know if you need me to provide anything else.
Very simple my friend, your posts documents can't be ordered like this:
.orderBy('createdAt', 'desc')
Because the post documents does not have the createdAt property, but they have a timestamp property, you should use that property to order your posts like this:
.orderBy('timestamp', 'desc')
I hope that helps 👍
I am not answering directly to #Ruben Garcia Bri, but for future firebase developers who may run into the problem of getting empty documents, I also ran into the same problem, but I solved it by adding a field to the particular document I am trying to retrieve.
Sometimes the cause is because the documents you are trying to get have no field in them.
I mean that a document must have a field before the server can recognize it as an existing document.
So if you run into this problem, consider adding a field to that document before you can successfully retrieve it.

Get delta of nested array element in a document using change stream (Node.js)

I have a document that has an array field called telephone which can have multiple extension array objects that can have multiple objects in it. So its an array inside an array. I am listening to the db using changeStream. If I change telephone[0].extension[0].valueBoolean = true where telephone[0].extension[0].url == "https//google.com",
I get the whole telephone array back in change.updateDescription.updatedFields NOT just telephone[0].extension[0]
updatedFields
{
"telephone": [{
"use": "offline",
"extension": [
{
"url": "https//gmail.com",
"valueDateTime": "2021-01-12T06:31:48.000Z"
}, {
"url": "https//yahoo.com",
"valueDateTime": "1700-01-01T00:00:00.000Z"
}, {
"url": "https//google.com",
"TimeLastModified": "2021-02-23T11:06:06.000Z",
"valueBoolean": false
}],
"value": "+123456789",
"system": "phone"
}, {
"use": "internet",
"extension": [
{
"url": "https//gmail.com",
"valueDateTime": "2021-01-12T06:31:48.000Z"
}, {
"url": "https//yahoo.com",
"valueDateTime": "1700-01-01T00:00:00.000Z"
}, {
"url": "https//google.com",
"TimeLastModified": "2021-02-23T11:06:06.000Z",
"valueBoolean": false
}],
"value": "+123456799",
"system": "phone"
}]
}
Here's what i have so far
MongoClient.connect(CONNECTION_STRING, {
useUnifiedTopology: true,
})
.then((client) => {
console.log("Connected successfully to server");
dbConnected = true;
}
// specify db and collections
const db = client.db(DB_NAME);
const myCollection = db.collection(COLLECTION_NAME);
const options = { fullDocument: "updateLookup" };
const changeStream = myCollection.watch(options);
// start listening to changes
changeStream.on("change", async (change) => {
// console.log("CHANGE!");
// console.log(JSON.stringify(change));
// check operationType
try {
if (
change.operationType == "insert" ||
change.operationType == "update" ||
change.operationType == "replace"
) {
const updatedFields = change.updateDescription.updatedFields
console.log("updatedFields", JSON.stringify(change.updateDescription.updatedFields));
}
} catch (e) {
console.log(e);
}
});
})
.catch((e) => {
console.log(`Error: ${e}`);
});
How do I see what exact element in a nested array changed with changeStream ?
Unfortunately it seems that this is currently not supported - there's an open Jira-ticket that is related to your problem, see https://jira.mongodb.org/browse/SERVER-41559 for further details.

Conditionally adding term and multi_match filters to elasticsearch with nodejs client

I am using ElasticSearch 7.9 with a nodejs client. I have the following query :
{
"query": {
"bool": {
"must":[
{ "terms" : { "id" : ["5f0d06fb5112231eb89eb819", "5f0d06fb5112231eb89eb817"] } },
{"query_string": {
"query": "(News) OR (Entertainent)",
"fields": [ "topics", "subTopics", "categories"]
}
},
{
"multi_match": {
"query": "publisher",
"fields": ["text", "name", "title", "subtitle", "description"]
}
}
]
}
}
}
I want to be able to conditionally add the terms filter for id if list of ids coming in to the nodejs/js function is not empty. Similarly for the multi_match query text as well. Add the multi_filter only if the incoming text is not empty
Should all queries be pre-constructed or is it possible to have conditional blocks and add only if the empty text or array of Ids are not empty.
My current method expects both ids and text input to the method to have valid values but these could be empty. Do I separate methods to handle the empty conditions
export const searchResults = async (text, ids) => {
const response = await client.search({
index: "new_index", //customer.id
type: "_doc",
body: {
query: {
bool: {
must: [
{terms: {"id": ids}},
{query_string: {
query: "(News) OR (Entertainent)",
fields: [ "topics", "subTopics", "categories"]
}
},
{
multi_match: {
query: text,
fields: ["text", "name", "title", "subtitle", "description"],
}
}
],
}
}
},
});
return response?.hits?.hits.map(({_source}) => _source) || [];
};
any help is really appreciated.
In a production environment and managing client based applications you should use search-templates, where you can use conditional blocks of queries. Besides, if you would want to change your query you would not neet to redeploy your app, just change it on ES.

Rejecting mapping update error in firebase when pushing data to elastic search index

I am trying to use Firebase cloud functions to push data to my ElasticSearch index and am experiencing some error in the Firebase. Where could the problem be?
Here is my index.js function code
const functions = require('firebase-functions');
const request = require('request-promise')
exports.indexPostsToElastic = functions.database.ref('/posts/{post_id}')
.onWrite((change,context) =>{
let postData = change.after.val();
let post_id = context.params.post_id;
console.log('Indexing post',postData);
let elasticSearchConfig = functions.config().elasticsearch;
let elasticSearchUrl = elasticSearchConfig.url + 'posts/' + post_id;
let elasticSearchMethod = postData ? 'POST' : 'DELETE';
let elasticSearchRequest = {
method:elasticSearchMethod,
url: elasticSearchUrl,
auth:{
username : elasticSearchConfig.username,
password : elasticSearchConfig.password,
},
body: postData,
json : true
};
return request(elasticSearchRequest).then(response => {
return console.log("ElasticSearch response", response);
})
});
And below is the error am receiving in Firebase
StatusCodeError: 400 - {"error":{"root_cause":[{"type":"illegal_argument_exception","reason":"Rejecting mapping update to [posts] as the final mapping would have more than 1 type: [_doc, -LcVpBay0SLV3c6fnpgt]"}],"type":"illegal_argument_exception","reason":"Rejecting mapping update to [posts] as the final mapping would have more than 1 type: [_doc, -LcVpBay0SLV3c6fnpgt]"},"status":400}
at new StatusCodeError (/user_code/node_modules/request-promise/node_modules/request-promise-core/lib/errors.js:32:15)
Here is my index code in postman
{
"mappings":{
"properties":{
"city":{
"type": "text"
},
"contact_email":{
"type": "text"
},
"country":{
"type": "text"
},
"description":{
"type": "text"
},
"image":{
"type": "text"
},
"post_id":{
"type": "text"
},
"state_province":{
"type": "text"
},
"title":{
"type": "text"
}
}
}
}
As the error message suggests, multiple mapping types have been removed.
https://www.elastic.co/guide/en/elasticsearch/reference/6.0/removal-of-types.html

Resources