Agregate on agregate filter in Logstash? - logstash

I have a stock table where I have stocks for shops/products:
input {
jdbc {
statement => "SELECT ShopId, ProductCode, Quantity FROM stock ORDER BY productcode;"
}
}
then I have a simple filter to aggregate that data:
filter {
aggregate {
task_id => "%{productcode}"
code => "
map['productcode'] ||= event.get('productcode')
map['objectID'] ||= event.get('productcode')
map['stocks'] ||= []
map['stocks'] << {
'ShopId' => event.get('ShopId'),
'quantity' => event.get('quantity'),
}
event.cancel()
"
push_previous_map_as_event => true
timeout => 3
}
}
which gives me output I expect, for example:
{
"productcode": "123",
"objectID": "123",
"stocks": [
{
"ShopId": 1
"Quantity": 2
},
{
"ShopId": 2
"Quantity": 5
}
]
}
now I can push that data to Algolia via http output plugin.
But the issue I have is that it's thousands of objects which makes thousands of calls.
That's why I think to use batch endpoint, pack those objects to package of f.e. 1000 objects, but to do so, I need to adjust structure to:
{
"requests": [
{
"action": "addObject",
"body": {
"productcode": "123",
"objectID": "123",
...
}
},
{
"action": "addObject",
"body": {
"productcode": "456",
"objectID": "456",
...
}
}
]
}
which looks to me like another aggregate function, but I already tried:
aggregate {
task_id => "%{source}"
code => "
map['requests'] ||= []
map['requests'] << {
'action' => 'addObject',
'body' => {
'productcode' => event.get('productcode'),
'objectId' => event.get('objectID'),
'stocks' => event.get('stocks')
}
}
event.cancel()
"
push_previous_map_as_event => true
timeout => 3
but it does not work.
Also with this type of aggregate function I'm not able to configure how big packages I would like to send to batch output.
I will be very grateful for any help or clues.

Related

elasticsearch node.js API remove an object from an array on a document using painless script results in array Index Out of Bounds

I want to remove items (an object) from an array on a document in elasticsearch, however whenever I try and run my update script using painless, I receive an Array Index Out of Bounds exception.
I'm using the javascript elasticsearch npm package to search elasticsearch for the relevant documents which then returns me data like:
"_index": "centres",
"_type": "doc",
"_id": "51bc77d1-b514-4f4e-85fa-412def6829f5",
"_score": 1,
"_source": {
"id": "cbaa7daa-f1a2-4ac3-8d7c-fc981245d21c",
"name": "Five House",
"openDays": [
{
"title": "new open Day",
"endDate": "2022-03-22T00:00:00.000Z",
"id": "82be934b-eeb1-419c-96ed-a58808b30df7"
},
{
"title": "last open Day",
"endDate": "2020-12-24T00:00:00.000Z",
"id": "8cc339b9-d2f8-4252-b68a-ed0a49cbfabd"
}
]
}
I then want to go through and remove certain items from the openDays array. I've created an array of the items I want to remove, so for the above example:
[
{
id: '51bc77d1-b514-4f4e-85fa-412def6829f5',
indexes: [
{
"title": "last open Day",
"endDate": "2020-12-24T00:00:00.000Z",
"id": "8cc339b9-d2f8-4252-b68a-ed0a49cbfabd"
}
]
}
]
I'm then trying to run an update via the elasticsearch node client like this:
for (const centre of updates) {
if (centre.indexes.length) {
await Promise.all(centre.indexes.map(async (theIndex) => {
const updated = await client.update({
index: 'centres',
type: 'doc',
id: centre.id,
body: {
script: {
lang: 'painless',
source: "ctx._source.openDays.remove(ctx._source.openDays.indexOf('openDayID'))",
params: {
"openDayID": theIndex.id
}
}
}
}).catch((err) => {throw err;});
}))
.catch((err) => {throw err;});
await client.indices.refresh({ index: 'centres' }).catch((err) => { throw err;});
}
}
When I run this though, it returns a 400 with an "array_index_out_of_bounds_exception" error:
-> POST http://localhost:9200/centres/doc/51bc77d1-b514-4f4e-85fa-412def6829f5/_update
{
"script": {
"lang": "painless",
"source": "ctx._source.openDays.remove(ctx._source.openDays.indexOf(\u0027openDayID\u0027))",
"params": {
"openDayID": "8cc339b9-d2f8-4252-b68a-ed0a49cbfabd"
}
}
}
<- 400
{
"error": {
"root_cause": [
{
"type": "remote_transport_exception",
"reason": "[oSsa7mn][172.17.0.2:9300][indices:data/write/update[s]]"
}
],
"type": "illegal_argument_exception",
"reason": "failed to execute script",
"caused_by": {
"type": "script_exception",
"reason": "runtime error",
"script_stack": [],
"script": "ctx._source.openDays.remove(ctx._source.openDays.indexOf(\u0027openDayID\u0027))",
"lang": "painless",
"caused_by": {
"type": "array_index_out_of_bounds_exception",
"reason": null
}
}
},
"status": 400
}
I'm not quite sure where I'm going wrong with this. Am I using the indexOf painless script correctly? Does indexOf allow for the searching of properties on objects in arrays?
I stumbled across this question and answer: Elasticsearch: Get object index with Painless script
The body of the update script needs changing like so:
Promise.all(...
const inline = `
def openDayID = '${theIndex.id}';
def openDays = ctx._source.openDays;
def openDayIndex = -1;
for (int i = 0; i < openDays.length; i++)
{
if (openDays[i].id == openDayID)
{
openDayIndex = i;
}
}
if (openDayIndex != -1) {
ctx._source.openDays.remove(openDayIndex);
}
`;
const updated = await client.update({
index: 'centres',
type: 'doc',
id: centre.id,
body: {
script: {
lang: 'painless',
inline: inline,
},
}
}).catch((err) => {throw err;});
await client.indices.refresh({ index: 'centres' }).catch((err) => { throw err;});
})).catch(... //end of Promise.all
I am not au fait with painless scripting, so there are most likely better ways of writing this e.g. breaking once the index of the ID is found.
I have also had to move the refresh statement into the Promise.all since if you're trying to remove more than one item from the array of objects, you'll be changing the document and changing the index. There is probably a better way of dealing with this too.
'openDayID' should be params.openDayID
And use removeIf:
"ctx._source.openDays.removeIf(el -> (el.id == params.openDayID))"

Add key on each json object in Promise

I have the following Promise and Promise.all which works and it returns a json object. However, I want to add a key for each return object.
as of now, it returns something
[value: {school object}, value:{students object}, value:{classroom object}]
desired output:
["schools": {school object }, {students object} , {classroom object} ]
Current Implementation:
new Promise((resolve, reject) => {
const school = getschool (webHost, dataSource, req);
const classRooms = getClassRooms(webHost, dataSource, req);
const students = getstudents (webHost,dataSource, req);
Promise.all([school ,classRooms,students ]).then((res) => {
resolve(res);
})
.catch((error) => {
logger.error(`${error}`);
reject(error);
});
});
classroom
{
"metadata": "metadata",
"value": [
{
"class_id": "171717",
"teacher_name": "Science"
}
]
}
School object
{
"metadata": "metadata",
"value": [
{
"id": "2345354",
"schoolName": "Memorial High School"
}
]
}
Student json
{
"metadata": "metadata",
"value": [
{
"id": "1234",
"studentName": "Beck"
},
{
"id": "5678",
"studentName": "Jeck"
}
]
}
Desired Output:
[
{
"class_id":"171717",
"teacher_name":"Science",
"id":"2345354",
"schoolName":"Memorial High School",
"Students":[
{
"id":"1234",
"studentName":"Beck"
},
{
"id":"5678",
"studentName":"Jeck"
}
]
}
]
It seems like you want to merge the objects to make one unified object of custom type, here is what you want to do:
res => resolve(
{
...res[1].value[0],
...res[0].value[0],
Students: res[2].value
}
)
The ... is called spread syntax. It "explodes" the objects and arrays. What we want here is get the internals of classRooms.value[0] merge them with school.value[0] object's internals, and then, add another attribute at the same level with key as Students which is a not-exploded array specified by student.value.
Here I have created a small TS Playground Example for you to play with the syntax and modify the output the way you may seem fit.
If you run it, it prints the desired output:
{
"class_id": "171717",
"teacher_name": "Science",
"id": "2345354",
"schoolName": "Memorial High School",
"Students": [
{
"id": "1234",
"studentName": "Beck"
},
{
"id": "5678",
"studentName": "Jeck"
}
]
}
-- ORIGINAL ANSWER --
Promise.all returns a promise of resolved objects in an array. So, the .then takes the parameter that's an array of resolved objects in the same order. That means your res parameter is an array of school object, students object, and classroom object in that order. you can do the following to "zip" them up.
new Promise((resolve, reject) => {
const school = getschool (webHost, dataSource, req);
const classRooms = getClassRooms(webHost, dataSource, req);
const students = getstudents (webHost,dataSource, req);
Promise.all([school ,classRooms,students ]).then((res) => {
resolve({"schools": res[0], "classRooms" : res[1], "students": res[2]});
})
.catch((error) => {
logger.error(`${error}`);
reject(error);
});
});
or even better,
.then(([schools, classRooms, students]) => {
resolve({schools, classRooms, students});
})

How to merge multiple jdbc inputs in logstash configuration based on condition of single column?

I have two tables in sql server i.e. AppDetails and AppBranchDetails.
I want to read all rows of these two tables and merge based on the condition.
Below are the two queries which I want to run:
select id as colg_id, name, sirname from AppDetails order by id
select id as branch_id, branch_name, branch_add from AppBranchDetails order by id
From the above two queries, "id" is a primary key which is same for both the tables.
The output will looks like below for id == 1:
{
"name": "ram",
"sirname": "patil",
"id": 1,
"BRANCH": [
{
"id": 1,
"branch_name": "EE",
"branch_add": "IND"
},
{
"id": 1,
"branch_name": "ME",
"branch_add": "IND"
}
]
}
The output will looks like below for id == 2:
{
"name": "sham",
"sirname": "bhosle",
"id": 2,
"BRANCH": [
{
"id": 2,
"branch_name": "SE",
"branch_add": "US"
},
{
"id": 2,
"branch_name": "FE",
"branch_add": "US"
}
]
}
I am trying with below configuration (app.conf):
input {
jdbc {
jdbc_connection_string => "jdbc:sqlserver://x.x.x.x:1433;databaseName=AAA;"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_user => "sa"
jdbc_password => "sa#111"
statement => "select id as colg_id, name, sirname from AppDetails order by id"
tracking_column => "colg_id"
use_column_value => true
type => "college"
}
jdbc {
jdbc_connection_string => "jdbc:sqlserver://x.x.x.x:1433;databaseName=AAA;"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_user => "sa"
jdbc_password => "sa#111"
statement => "select id as branch_id, branch_name, branch_add from AppBranchDetails order by id"
tracking_column => "branch_id"
use_column_value => true
type => "branch"
}
}
filter {
if [type] == "branch" {
aggregate {
task_id => "%{branch_id}"
code => "
map['BRANCH'] ||= []
map['BRANCH'] << event.get('branch_id')
map['BRANCH'] << event.get('branch_name')
map['BRANCH'] << event.get('branch_add')
event.cancel()
"
push_previous_map_as_event => true
timeout => 5
}
mutate {
remove_field => [ "#version" , "#timestamp" ]
}
}
}
output {
stdout { codec => json_lines }
}
Can anyone please suggest how I can make the result which I have mentioned above.

Elasticsearch need to search from multiple types with type specific where clause

I have Es index with multiple type and each type caters it's own filter parameters. Now we are building a Global Search on Es for multiple type and I am bit confused how to use type specific where clause to be included in NEST.
Elastic Search
-> Type 1 (where x=1)
-> Type 2 (where y=1)
Now we are building a search query
var result = client.Search<ISearchDto>(s => s
.From(from)
.Size(PageSize)
.Types(lstTypes)
.Query(q => q.QueryString(qs => qs.Query(query)))
);
*lstTypes will have Type 1 and Type 2
Now how can i add the where clause for all type 1 items with x=1 and for all type 2 items with y=1 in NEST.
Hope the question is clear, any help on this will be highly appreciated.
You can query on the _type meta field in much the same way as you query on any other field. To perform different queries based on type within one search query, you can use a bool query with multiple clauses
client.Search<ISearchDto>(s => s
.From(from)
.Size(pageSize)
.Type(Types.Type(typeof(FirstSearchDto), typeof(SecondSearchDto)))
.Query(q => q
.Bool(b => b
.Should(sh => sh
.Bool(bb => bb
.Filter(
fi => fi.Term("_type", "firstSearchDto"),
fi => fi.Term(f => f.X, 1)
)
), sh => sh
.Bool(bb => bb
.Filter(
fi => fi.Term("_type", "secondSearchDto"),
fi => fi.Term(f => f.Y, 1)
)
)
)
)
)
);
We have a bool query with 2 should clauses; each should clause is a bool query with the conjunction of 2 filter clauses, one for _type and the other for the property to be queried for each type, respectively.
NEST supports operator overloading so this query can be written more succinctly with
client.Search<ISearchDto>(s => s
.From(from)
.Size(pageSize)
.Type(Types.Type(typeof(FirstSearchDto), typeof(SecondSearchDto)))
.Query(q => (+q
.Term("_type", "firstSearchDto") && +q
.Term(f => f.X, 1)) || (+q
.Term("_type", "secondSearchDto") && +q
.Term(f => f.Y, 1))
)
);
Both produce the following query
{
"from": 0,
"size": 20,
"query": {
"bool": {
"should": [
{
"bool": {
"filter": [
{
"term": {
"_type": {
"value": "firstSearchDto"
}
}
},
{
"term": {
"x": {
"value": 1
}
}
}
]
}
},
{
"bool": {
"filter": [
{
"term": {
"_type": {
"value": "secondSearchDto"
}
}
},
{
"term": {
"y": {
"value": 1
}
}
}
]
}
}
]
}
}
}

elapsed + aggregate passing custom fields in Logstash

I am using elapsed plugin to calculate time and aggregate plugin then to display it.
I added custom fields to elapsed filter
You can see it below:
add_field => {
"status" => "Status"
"User" => "%{byUser}"
}
One is static the other one is dynamic coming with event.
On output of logstash it display only static values not dynamic one..
It displays %{byUser} for dynamic one.
But for task id and status fields works just fine and I got right values.
Any idea why?
Little bit more code
elapsed {
unique_id_field => "assetId"
start_tag => "tag1:tag2"
end_tag => "tag3:tag4"
add_field => {
"wasInStatus" => "tag3"
"User" => "%{byUser}"
}
add_tag => ["CustomTag"]
}
grok input:
grok {
match => [
"message", "%{TIMESTAMP_ISO8601:timestamp} %{NUMBER:assetId} %{WORD:event}:%{WORD:event1} User:%{USERNAME:byUser}"]
if "CustomTag" in [tags] and "elapsed" in [tags] {
aggregate {
task_id => "%{assetId}"
code => "event.to_hash.merge!(map)"
map_action => "create_or_update"
}
}
problem is connected with:
elapsed filter:
new_event_on_match => true/false
Change new_event_on_match to false was true in my pipeline fixed issue.but still wonder why.
I also faced similar issue now, and found a fix for it. When new_event_on_match => true is used the elapsed event will be separated from the original log and a new elapsed event will be entered to the ElasticSearch as below
{
"_index": "elapsed_index_name",
"_type": "doc",
"_id": "DzO03mkBUePwPE-nv6I_",
"_version": 1,
"_score": null,
"_source": {
"execution_id": "dfiegfj3334fdsfsdweafe345435",
"elapsed_timestamp_start": "2019-03-19T15:18:34.218Z",
"tags": [
"elapsed",
"elapsed_match"
],
"#timestamp": "2019-04-02T15:39:40.142Z",
"host": "3f888b2ddeec",
"cus_code": "Custom_name", [This is a custom field]
"elapsed_time": 41.273,
"#version": "1"
},
"fields": {
"#timestamp": [
"2019-04-02T15:39:40.142Z"
],
"elapsed_timestamp_start": [
"2019-03-19T15:18:34.218Z"
]
},
"sort": [
1554219580142
]
}
For adding the "cus_code" to the elapsed event object from the original log (log from where the elapsed filter end tag is detected), I added an aggregate filter as below:
if "elapsed_end_tag" in [tags] {
aggregate {
task_id => "%{execution_id}"
code => "map['cus_code'] = event.get('custom_code_field_name')"
map_action => "create"
}
}
and add the end block of aggregation by validating the 'elapsed' tag
if "elapsed" in [tags] {
aggregate {
task_id => "%{execution_id}"
code => "event.set('cus_code', map['cus_code'])"
map_action => "update"
end_of_task => true
timeout => 400
}
}
So to add custom field to elapsed event we need to combine aggregate filter along with elapse filter

Resources