How can I fix out of memory issue in Kusto query? - azure

I have an issue with memory usage in Azure DashBoard. The below query is running well and producing the expected result but from Time to time It is throwing memory usage exceeds error.
let Requests = requests
| extend MyCustomer= trim_end('/',tostring(split(customDimensions.source,'//')[1])), OrderID= tostring(customDimensions.subject),
Environment=trim_start(#'[\d.][\d.][\d.]',tostring(split(cloud_RoleName,'-')[4])), Data= tostring(customDimensions.data), state= tostring(customDimensions.type),
OperationName = tostring(customDimensions.OperationName),Category = tostring(customDimensions.Category)
| where OrderID != "" and MyCustomer !contains "eurotracs"
| project success, resultCode, state,ProcessingDate = timestamp, MyCustomer, OrderID, Environment, operation_Id,OperationName,Category
| join kind=fullouter (exceptions
| extend Method = innermostMethod,ReasonMessage=innermostMessage
| distinct ReasonMessage,Method, operation_Id,method )
on operation_Id
| sort by OrderID desc, ProcessingDate desc;
Requests
|extend
Test1_OK =iif(split(method, ".")[-1] contains "Test1", "NO","YES")
,Test2_OK =iif(split(method, ".")[-1] contains "Test2", "NO","YES")
,Test3_OK =iif(split(method, ".")[-1] !contains "Test1" and split(method, ".")[-1] !contains "Test2", "NO","YES")
| project ProcessingDate,OrderID,success,Test1_OK,Test2_OK,Test3_OK, ProcessingStateDetail =split(method, ".")[-1],ReasonMessage, state
| order by ProcessingDate

Related

Azure AppInsights query union

I've been writing some queries against AppInsights and noticed that in my data there's 2 ways of determining if a username exists against the telemetry.
customEvents
| where tostring(parse_json(tostring(customDimensions)).username) != '' or tostring(parse_json(tostring(customDimensions.Properties)).username) != ''
| project
Username = tostring(parse_json(tostring(customDimensions)).username),
timestamp = timestamp
| distinct Username, bin(timestamp, 1d)
| summarize count() by bin(timestamp, 1d)
| render timechart
Bit stuck, notice in the first where there's 2 ways of determing whether a record is valid, how do I change the projection to then say "if username is here, take it from here, else check in customDimensions.Properties
I assume we need a union from somewhere?
you could use the coalesce() function:
datatable(customDimensions: string)
[
'{"username": "user1"}',
'{"Properties": {"username": "user2"}}'
]
| where customDimensions has 'username'
| extend cd = parse_json(customDimensions)
| project UserName = tostring(coalesce(cd.username, cd.Properties.username))
| where isnotempty(UserName)
Username
user1
user2

Find logs of POD in AKS using Log Analytics Query

There is a AKS running that is connected to Log Analytics in Azure.
I'm trying to view logs of named PODs using the following query snippet:
let KubePodLogs = (clustername:string, podnameprefix:string) {
let ContainerIdList = KubePodInventory
| where ClusterName =~ clustername
| where Name startswith strcat(podnameprefix, "-")
| where strlen(ContainerID)>0
| distinct ContainerID;
ContainerLog
| where ContainerID in (ContainerIdList)
| join (KubePodInventory | project ContainerID, Name, PodLabel, Namespace, Computer) on ContainerID
| project TimeGenerated, Node=Computer, Namespace, PodName=Name1, PodLabel, ContainerID, LogEntry
};
KubePodLogs('aks-my-cluster', 'my-service') | order by TimeGenerated desc
The above query does return rows of the matching PODs but not all that are actually available.
Trying to get results of the partial queries by inspecting POD details:
KubePodInventory
| where ClusterName =~ 'aks-my-cluster'
| where Name startswith 'my-service-'
| where strlen(ContainerID)>0
| distinct ContainerID;
gives me a container-id. Now feeding this container-id into another query shows more
results then the combined query from above. Why ?
ContainerLog
| where ContainerID == "aec001...fc31"
| order by TimeGenerated desc
| project TimeGenerated, ContainerID, LogEntry
One thing I noticed is that the later simple query result contain log results that have a LogEntry field parsed from JSON formatted output of the POD. In the results I can expand LogEntryto more fields corresponding to the original JSON data of that POD log output.
I.e. it seems like the combined query ( with a join ) skips those JSON LogEntry ContainerLog entries, but why ?
As far as I can see the combined query doesn't filter in any way on the LogEntry field.
A changed query seems to produce the results I would expect:
I exchanged the join with a lookup and used more columns to distinct the KubePodInventory results.
let KubePodLogs = (clustername:string, podnameprefix:string) {
let ContainerIdList = KubePodInventory
| where ClusterName =~ clustername
| where Name startswith strcat(podnameprefix, "-")
| where strlen(ContainerID)>0
| distinct ContainerID, PodLabel, Namespace, PodIp, Name;
ContainerLog
| where ContainerID in (ContainerIdList)
| lookup kind=leftouter (ContainerIdList) on ContainerID
| project-away Image, ImageTag, Repository, Name, TimeOfCommand
| project-rename PodName=Name1
};
KubePodLogs('aks-my-cluster', 'my-service') | order by TimeGenerated desc

How do I access outer column in subquery in kusto / Azure application insights?

I am trying to simply run a subquery in Azure application insights, using Kusto, so that I can get some information from two tables displayed as one.
The query I'm trying is
table1
| extend progressLog = toscalar(
table2
| where common_Id == table1.common_Id // errors saying Ensure that expression: table1.common_Id is indeed a simple name
| summarize makelist(stringColumn)
)
I have attempted to alias this id, and even join the two tables, as such:
requests
| extend aliased_id = common_Id
| join traces on operation_Id, $left.operation_Id == $right.operation_Id
| extend test_id = operation_Id
| extend progressLog = toscalar(
traces
| where operation_Id == aliased_id // Failed to resolve column or scalar expression named 'aliased_id'
| summarize makelist(message)
)
Failed to resolve column or scalar expression named 'aliased_id'.
I am simply trying to do the equivalent of the T-SQL query:
SELECT
... ,
STRING_AGG(table2.stringColumn, ',')
FROM
table1
INNER JOIN
table2
ON table1.common_Id = table2.common_Id
GROUP BY
table.<props>
My main question is - how do I reference "common_Id" in the kusto language inside a subquery
Please see if the next query provides what you're looking for. If not, please share sample input using datatable, as I did below, and expected output:
let requests = datatable(common_Id:string, operation_Id:string)
[
"A", "X",
"B", "Y",
"C", "Z"
];
let traces = datatable(operation_Id:string, message:string)
[
"X", "m1",
"X", "m2",
"Y", "m3"
];
let messagesByOperationId = traces | summarize makelist(message) by operation_Id;
requests
| join kind=leftouter messagesByOperationId on operation_Id
| project common_Id, operation_Id, progressLog = list_message

Show first entry by timestamp of each event group

we collect custom events in application insights for each message a user sends to a chatbot. The event is called user_message.
We use a custom dimension field customDimensions.conversationid to know which message is related to which conversation.
I want to see the first message of each conversation so basically the "oldest" timestamp of each event based on the conversation id.
I tried to work with arg_max but I didn't figure out how it works.
customEvents
| extend itemType = iif(itemType == 'customEvent',itemType,"")
| where (itemType == 'customEvent')
| where name == 'User_Message'
i was able to show all user messages ordert by the conversationID however it shows me multiple lines and i only need the first message by conversation.
Datamodel:
timestamp [UTC] 2019-04-05T13:24:10.359Z
name User_Message
itemType customEvent
customDimensions
confidence N/A
conversationId BNu0SqC5RfA1S0lZmdxxxxx
intent N/A
userMessage user text
operation_Name POST /api/messages
operation_Id xxxxxxxa5d422eadebfebb2
operation_ParentId xxxxx545a5d422eadebfebb2.99811380_13.f033f887_
application_Version 1.0.0
client_Type PC
client_OS Windows_NT 10.0.14393
client_IP 0.0.0.0
client_City Amsterdam
client_StateOrProvince North Holland
client_CountryOrRegion Netherlands
cloud_RoleName Web
cloud_RoleInstance XXXXXXXFF74D594
appId ccccccc-8b24-41bb-a02a-1cb101da84e5
appName bot-XXXXX
iKey XXXXXX
sdkVersion node:XX
itemId XXXXXXXX-57a6-11e9-a5a7-ebc91e7cf64e
itemCount 1
SOLUION
customEvents
| extend itemType = iif(itemType == 'customEvent',itemType,"")
| where (itemType == 'customEvent')
| where (name=='User_Message')
| summarize list=makeset(customDimensions.userMessage) by
tostring(customDimensions.conversationId)
| mv-expand firstMessage=list[0]
Update:
customEvents
| where name == "User_Message"
| summarize timestamp=min(timestamp) by myconid=tostring(customDimensions.[conversationID])
| join kind= inner (
customEvents
| where name == "User_Message"
| extend myconid = tostring(customDimensions.[conversationID])
) on myconid,timestamp
You can use inner join to do that.
I don't have your data, so in your case, the code looks like below(maybe you need to make a little changes):
customEvents
| summarize timestamp=min(timestamp) by conversationID
| join kind= inner (
customEvents
) on conversationID,timestamp
| project-away conversationID1,timestamp1
Please let me know if you have more issues.

Search Query should contain 'AggregatedValue' and 'bin(timestamp, [roundTo])' for Metric alert type

I'm trying to create a custom metric alert based on some metrics in my Application Insights logs. Below is the query I'm using;
let start = customEvents
| where customDimensions.configName == "configName"
| where name == "name"
| extend timestamp, correlationId = tostring(customDimensions.correlationId), configName = tostring(customDimensions.configName);
let ending = customEvents
| where customDimensions.configName == configName"
| where name == "anotherName"
| where customDimensions.taskName == "taskName"
| extend timestamp, correlationId = tostring(customDimensions.correlationId), configName = tostring(customDimensions.configName), name= name, nameTimeStamp= timestamp ;
let timeDiffs = start
| join (ending) on correlationId
| extend timeDiff = nameTimeStamp- timestamp
| project timeDiff, timestamp, nameTimeStamp, name, anotherName, correlationId;
timeDiffs
| summarize AggregatedValue=avg(timeDiff) by bin(timestamp, 1m)
When I run this query in Analytics page, I get results, however when I try to create a custom metric alert, I got the error Search Query should contain 'AggregatedValue' and 'bin(timestamp, [roundTo])' for Metric alert type
The only response I found was adding AggregatedValue which I already have, I'm not sure why custom metric alert page is giving me this error.
I found what was wrong with my query. Essentially, aggregated value needs to be numeric, however AggregatedValue=avg(timeDiff) produces time value, but it was in seconds, so it was a bit hard to notice. Converting it to int solves the problem,
I have just updated last bit as follows
timeDiffs
| summarize AggregatedValue=toint(avg(timeDiff)/time(1ms)) by bin(timestamp, 5m)
This brings another challenge on Aggregate On while creating the alert as AggregatedValue is not part of the grouping that is coming after by statement.

Resources