Related
I have a document contain title with "Hard work & Success". I need to do a search for this document. And if I typed "Hardwork" (without spacing) it didn't returning any value. but if I typed "hard work" then it is returning the document.
this is the query I have used :
const search = qObject.search;
const payload = {
from: skip,
size: limit,
_source: [
"id",
"title",
"thumbnailUrl",
"youtubeUrl",
"speaker",
"standards",
"topics",
"schoolDetails",
"uploadTime",
"schoolName",
"description",
"studentDetails",
"studentId"
],
query: {
bool: {
must: {
multi_match: {
fields: [
"title^2",
"standards.standard^2",
"speaker^2",
"schoolDetails.schoolName^2",
"hashtags^2",
"topics.topic^2",
"studentDetails.studentName^2",
],
query: search,
fuzziness: "AUTO",
},
},
},
},
};
if I searched for title "hard work" (included space)
then it returns data like this:
"searchResults": [
{
"_id": "92",
"_score": 19.04531,
"_source": {
"standards": {
"standard": "3",
"categoryType": "STANDARD",
"categoryId": "S3"
},
"schoolDetails": {
"categoryType": "SCHOOL",
"schoolId": "TPS123",
"schoolType": "PUBLIC",
"logo": "91748922mn8bo9krcx71.png",
"schoolName": "Carmel CMI Public School"
},
"studentDetails": {
"studentId": 270,
"studentDp": "164646972124244.jpg",
"studentName": "Nelvin",
"about": "good student"
},
"topics": {
"categoryType": "TOPIC",
"topic": "Motivation",
"categoryId": "MY"
},
"youtubeUrl": "https://www.youtube.com/watch?v=wermQ",
"speaker": "Anna Maria Siby",
"description": "How hardwork leads to success - motivational talk by Anna",
"id": 92,
"uploadTime": "2022-03-17T10:59:59.400Z",
"title": "Hard work & Success",
}
},
]
And if i search for the Keyword "Hardwork" (without spacing) it won't detecting this data. I need to make a space in it or I need to match related datas with the searching keyword. Is there any solution for this can you please help me out of this.
I made an example using a shingle analyzer.
Mapping:
{
"settings": {
"analysis": {
"filter": {
"shingle_filter": {
"type": "shingle",
"max_shingle_size": 4,
"min_shingle_size": 2,
"output_unigrams": "true",
"token_separator": ""
}
},
"analyzer": {
"shingle_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"shingle_filter"
]
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "shingle_analyzer"
}
}
}
}
Now I tested it with your term. Note that the token "hardwork" was generated but the others were also generated which may be a problem for you.
GET idx-separator-words/_analyze
{
"analyzer": "shingle_analyzer",
"text": ["Hard work & Success"]
}
Results:
{
"tokens" : [
{
"token" : "hard",
"start_offset" : 0,
"end_offset" : 4,
"type" : "<ALPHANUM>",
"position" : 0
},
{
"token" : "hardwork",
"start_offset" : 0,
"end_offset" : 9,
"type" : "shingle",
"position" : 0,
"positionLength" : 2
},
{
"token" : "hardworksuccess",
"start_offset" : 0,
"end_offset" : 19,
"type" : "shingle",
"position" : 0,
"positionLength" : 3
},
{
"token" : "work",
"start_offset" : 5,
"end_offset" : 9,
"type" : "<ALPHANUM>",
"position" : 1
},
{
"token" : "worksuccess",
"start_offset" : 5,
"end_offset" : 19,
"type" : "shingle",
"position" : 1,
"positionLength" : 2
},
{
"token" : "success",
"start_offset" : 12,
"end_offset" : 19,
"type" : "<ALPHANUM>",
"position" : 2
}
]
}
I have below document:
{
"_id": "61f7d5cfd0c32b744d3f81c2",
"_form": "61e66b8fd0c32b744d3e24a0",
"_workflow": "61e54fe2d0c32b744d3e0b7c",
"_appUser": "61e6b098d0c32b744d3e3808",
"sectionResponse": [{
"_id": "61f7d5cfd0c32b744d3f81c3",
"name": "Project Details & Goals",
"order": 2,
"fieldResponse": [{
"_id": "61f7d5cfd0c32b744d3f81c4",
"fieldType": "Text",
"name": "Project Name",
"value": "TRT",
"order": 0
},
{
"_id": "61f7d5cfd0c32b744d3f81c5",
"fieldType": "Number",
"name": "Amount Requested",
"value": "20",
"order": 1
},
{
"_id": "61f7d5cfd0c32b744d3f81c6",
"fieldType": "Number",
"name": "Project Cost",
"value": "50",
"order": 1
},
{
"_id": "61f7d5cfd0c32b744d3f81c7",
"fieldType": "Comment",
"name": "Project Goals",
"value": "TRT",
"order": 3
}
]
},
{
"_id": "61f7d5cfd0c32b744d3f81c8",
"name": "Section Heading",
"order": 2,
"fieldResponse": [{
"_id": "61f7d5cfd0c32b744d3f81c9",
"fieldType": "Multiselectdropdown",
"name": "Multiselectdropdown",
"value": "Y",
"order": 0
},
{
"_id": "61f7d5cfd0c32b744d3f81ca",
"fieldType": "Image_Upload",
"name": "Image Upload",
"value": "Y",
"order": 1
}
]
}
],
"order": 2,
"status": "Reviewed",
"updatedAt": "2022-01-31T12:27:59.541Z",
"createdAt": "2022-01-31T12:27:59.541Z",
"__v": 0
}
Inside the document, there is a sectionResponse which contains response of multiple sections. Inside this, there is a fieldResponse which contains the name and value. I have to extract the value from all the documents where name is Amount Requested.
How can I write a query for such a situation?
Here is a solution that returns only matching material and requires no $unwind.
db.foo.aggregate([
// This stage alone is enough to give you the basic info.
// You will get not only doc _id but also an array of arrays
// (fieldResponse within sectionResponse) containing the whole
// fieldResponse doc. It is slight awkward but if you need structural data
// other than *just* the value, it is a good start:
{$project: {
// outer filter removes inner filter results where size is 0
// i.e. no "Amount Requested" found.
XX: {$filter: {input:
{$map: {
input: "$sectionResponse", as: "z1", in:
// inner filter gets only name = Amount Requested
{$filter: {input: "$$z1.fieldResponse",
as: "z1",
cond: {$eq:["$$z1.name","Amount Requested"]}
}}
}},
as: "z2",
cond: {$ne: ["$$z2", [] ]}
}}
}}
which yields (given a slightly expanded input set where subdocs were copied but the value and order changed for clarity):
{
"_id" : 0,
"XX" : [
[
{
"_id" : "61f7d5cfd0c32b744d3f81c5",
"fieldType" : "Number",
"name" : "Amount Requested",
"value" : "20",
"order" : 1
},
{
"_id" : "61f7d5cfd0c32b744d3f81c5",
"fieldType" : "Number",
"name" : "Amount Requested",
"value" : "77",
"order" : 18
}
],
[
{
"_id" : "61f7d5cfd0c32b744d3f81c5",
"fieldType" : "Number",
"name" : "Amount Requested",
"value" : "99",
"order" : 818
}
]
]
}
{
"_id" : 1,
"XX" : [
[
{
"_id" : "61f7d5cfd0c32b744d3f81c5",
"fieldType" : "Number",
"name" : "Amount Requested",
"value" : "333",
"order" : 1
}
]
]
}
{ "_id" : 2, "XX" : [ ] }
If you don't want to know about top level docs that contained
NO fieldResponses where name = "Amount Requested" then append this stage:
{$match: {XX: {$ne: [] }}}
Finally, if you really want just the values, append this reduce stage:
,{$addFields: {XX: {$reduce: {
input: "$XX",
initialValue: [],
in: {$concatArrays: ["$$value",
{$map: {input: "$$this",
as:"z",
in: "$$z.value"
}} ] }
}}
}}
which yields:
{ "_id" : 0, "XX" : [ "20", "77", "99" ] }
{ "_id" : 1, "XX" : [ "333" ] }
If you want a little more than just value(like order for example) then have $map return a doc instead of a scalar, e.g.:
{$map: {input: "$$this",
as:"z",
in: {v:"$$z.value",o:"$$z.order"}
}} ] }
to yield:
{
"_id" : 0,
"XX" : [
{
"v" : "20",
"o" : 1
},
{
"v" : "77",
"o" : 18
},
{
"v" : "99",
"o" : 818
}
]
}
{ "_id" : 1, "XX" : [ { "v" : "333", "o" : 1 } ] }
Again, the input set provided by the OP was expanded with additional {name:"Amount Requested"} subdocs tossed into different sectionResponse arrays to generate a more complex structure.
Maybe something like this which you may easy adapt to python supposing you need only value from sectionResponse.$[].fieldResponse.$[] elements having the name "Amount Requested":
db.collection.aggregate([
{
$match: {
"sectionResponse.fieldResponse.name": "Amount Requested"
}
},
{
"$project": {
"sectionResponse": {
"$filter": {
"input": {
"$map": {
"input": "$sectionResponse",
"as": "somesub",
"in": {
"_id": "$$somesub._id",
"fieldResponse": {
"$filter": {
"input": "$$somesub.fieldResponse",
"as": "sub",
"cond": {
"$eq": [
"$$sub.name",
"Amount Requested"
]
}
}
}
}
}
},
"as": "some",
"cond": {
"$gt": [
{
"$size": "$$some.fieldResponse"
},
0
]
}
}
}
}
},
{
$unwind: "$sectionResponse"
},
{
$unwind: "$sectionResponse.fieldResponse"
},
{
$project: {
value: "$sectionResponse.fieldResponse.value"
}
}
])
Match the documents containing at least one element with sectionResponse.fieldResponse.name:"Amount Requested"
project/map all sectionResponse.fieldResponse elements containing name="Amount Requested" ( non empty elements only )
unwind the sectionResponse array
unwind the fieldResponse array
project only the value field.
playground
For best results index on "sectionResponse.fieldResponse.name" need to be added.
I currently have made a python program, request JSON data from an API. Now here is the thing though this JSON actually contains other request Urls to get extra data from that object.
import requests
import json
import sys
import os
import geojson
response = requests.get("http://api.gipod.vlaanderen.be/ws/v1/workassignment", params = {"CRS": "Lambert72"})
print(response.status_code)
text = json.dumps(response.json(),sort_keys=True, indent=4)
print(text)
f = open("text.json", "wt")
f.write(text)
print(os.getcwd())
JSON from request, the other request URLs including parameters is in the detail column.
[
{
"gipodId": 103246,
"owner": "Eandis Leuven",
"description": ", , ZAVELSTRAAT: E Nieuw distributienet (1214m)",
"startDateTime": "2007-12-03T06:00:00",
"endDateTime": "2014-01-06T19:00:00",
"importantHindrance": false,
"coordinate": {
"coordinates": [
4.697028256276443,
50.896894135898485
],
"type": "Point",
"crs": {
"type": "name",
"properties": {
"name": "urn:ogc:def:crs:OGC:1.3:CRS84"
}
}
},
**"detail": http://api.gipod.vlaanderen.be/ws/v1/workassignment/103246?crs=4326,
"cities": ["Leuven"]**
}
],
"latestUpdate": "2016-11-16T11:32:39.253"
}
The first request just gets the points (each unique with a certain id), while the second request gets the "details data" which also has polygon data and multiline.
Get Url:
http://api.gipod.vlaanderen.be/ws/v1/workassignment/[id]
{ "comment" : null,
"contactDetails" : { "city" : "Leuven",
"country" : "België",
"email" : null,
"extraAddressInfo" : null,
"firstName" : null,
"lastName" : null,
"number" : "58",
"organisation" : "Eandis Leuven",
"phoneNumber1" : "078/35.35.34",
"phoneNumber2" : null,
"postalCode" : "3012",
"street" : "Aarschotsesteenweg"
},
"contractor" : null,
"mainContractor" : null,
"description" : ", , ZAVELSTRAAT: E Nieuw distributienet (1214m)",
"diversions" : [
{
"gipodId": 1348152,
"reference": "IOW-TERRAS-2013-01-Z",
"description": "Horecaterras op parkeerstrook (Lierbaan 12)",
"comment": null,
"geometry": {
"geometries": [
{
"coordinates": [[[3.212947654779088, 51.175784679668915],
[3.2151308569159482, 51.17366647833133],
[3.216112818368467, 51.17328051591839],
[3.2186926906668876, 51.173044950954456],
[3.2204789191276944, 51.173098278776514],
[3.221602856602255, 51.173333934695286]]],
"type": "MultiLineString",
"crs": null
}
],
"type": "GeometryCollection",
"crs": {
"type": "name",
"properties": {
"name": "urn:ogc:def:crs:OGC:1.3:CRS84"
}
}
},
"periods": [{"startDateTime": "2013-04-09T00:00:00","endDateTime": "2013-10-31T00:00:00"}],
"recurrencePattern": null,
"latestUpdate": "2014-01-24T10:23:08.917",
"streets": null,
"diversionTypes": null,
"diversionDirection":
{
"type": 0,
"description": "Beide"
},
"status": "Vergund",
"contactDetails": {
"organisation": "Café Real",
"lastName": "Vets",
"firstName": "Peggy",
"phoneNumber1": null,
"phoneNumber2": null,
"email": "peggy.vets#skynet.be",
"street": "Lierbaan",
"number": "12",
"postalCode": "2580",
"city": "Putte",
"country": "België",
"extraAddressInfo": null
}
"url": null,
}
],
"endDateTime" : "2014-01-06T19:00:00",
"gipodId" : 103246,
"hindrance" : { "description" : null,
"direction" : null,
"effects" : [ "Fietsers hebben doorgang",
"Handelaars bereikbaar",
"Verminderde doorstroming in 1 richting",
"Voetgangers op de rijweg",
"Voetgangers hebben doorgang"
],
"important" : false,
"locations" : [ "Voetpad" ]
},
"latestUpdate" : "2013-06-18T03:43:28.17",
"location" : { "cities" : [ "Leuven" ],
"coordinate" : { "coordinates" : [ 4.697028256276443,
50.896894135898485
],
"crs" : { "properties" : { "name" : "urn:ogc:def:crs:OGC:1.3:CRS84" },
"type" : "name"
},
"type" : "Point"
},
"geometry" : { "coordinates" : [ [ [ [ 4.699934331336474,
50.90431808607037
],
[ 4.699948535632464,
50.90431829749237
],
[ 4.699938837004092,
50.90458139231922
],
[ 4.6999246328435396,
50.90458118062111
],
[ 4.699934331336474,
50.90431808607037
]
] ]
],
"crs" : { "properties" : { "name" : "urn:ogc:def:crs:OGC:1.3:CRS84" },
"type" : "name"
},
"type" : "MultiPolygon"
}
},
"owner" : "Eandis Leuven",
"reference" : "171577",
"startDateTime" : "2007-12-03T06:00:00",
"state" : "In uitvoering",
"type" : "Werken aan nutsleiding",
"url" : "http://www.eandis.be"
}
Now here is the deal, this request has to be repeated for each object I get from the First API Request. And this can be over one hundred objects. So logic dictates this has to happen in a loop, though how to start is bit..troublesome.
You can make you of functions in this case.
Your first function can simply fetch the list of the points. Your second function can simply fetch the data of details.
def fetch_details(url: str):
""" Makes request call to get the data of detail """
response = requests.get(url)
# any other processe
def fetch_points(url: str):
response = requests.get(url)
for obj in response.json():
fetch_details(obj.get("detail"))
api_url = "api.gipod.vlaanderen.be/ws/v1/workassignment"
fetch_points(api_url)
I'm migrating data from Mongo to Arango and I need to reproduce a $group aggregation. I have successfully reproduced the results but I'm concerned that my approach maybe sub-optimal. Can the AQL be improved?
I have a collection of data that looks like this:
{
"_id" : ObjectId("5b17f9d85b2c1998598f054e"),
"department" : [
"Sales",
"Marketing"
],
"region" : [
"US",
"UK"
]
}
{
"_id" : ObjectId("5b1808145b2c1998598f054f"),
"department" : [
"Sales",
"Marketing"
],
"region" : [
"US",
"UK"
]
}
{
"_id" : ObjectId("5b18083c5b2c1998598f0550"),
"department" : "Development",
"region" : "Europe"
}
{
"_id" : ObjectId("5b1809a75b2c1998598f0551"),
"department" : "Sales"
}
Note the value can be a string, Array or not present
In Mongo I'm using the following code to aggregate the data:
db.test.aggregate([
{
$unwind:{
path:"$department",
preserveNullAndEmptyArrays: true
}
},
{
$unwind:{
path:"$region",
preserveNullAndEmptyArrays: true
}
},
{
$group:{
_id:{
department:{ $ifNull: [ "$department", "null" ] },
region:{ $ifNull: [ "$region", "null" ] },
},
count:{$sum:1}
}
}
])
In Arango I'm using the following AQL:
FOR i IN test
LET FIELD1=(FOR a IN APPEND([],NOT_NULL(i.department,"null")) RETURN a)
LET FIELD2=(FOR a IN APPEND([],NOT_NULL(i.region,"null")) RETURN a)
FOR f1 IN FIELD1
FOR f2 IN FIELD2
COLLECT id={department:f1,region:f2} WITH COUNT INTO counter
RETURN {_id:id,count:counter}
Edit:
The APPEND is used to convert string values into an Array
Both produce results that look like this;
{
"_id" : {
"department" : "Marketing",
"region" : "US"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Development",
"region" : "Europe"
},
"count" : 1.0
}
{
"_id" : {
"department" : "Sales",
"region" : "null"
},
"count" : 1.0
}
{
"_id" : {
"department" : "Marketing",
"region" : "UK"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Sales",
"region" : "UK"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Sales",
"region" : "US"
},
"count" : 2.0
}
Your approach seems alright. I would suggest to use TO_ARRAY() instead of APPEND() to make it easier to understand though.
Both functions skip null values, thus it is unavoidable to provide some placeholder, or test for null explicitly and return an array with a null value (or whatever works best for you):
FOR doc IN test
FOR field1 IN doc.department == null ? [ null ] : TO_ARRAY(doc.department)
FOR field2 IN doc.region == null ? [ null ] : TO_ARRAY(doc.region)
COLLECT department = field1, region = field2
WITH COUNT INTO count
RETURN { _id: { department, region }, count }
Collection test:
[
{
"_key": "5b17f9d85b2c1998598f054e",
"department": [
"Sales",
"Marketing"
],
"region": [
"US",
"UK"
]
},
{
"_key": "5b18083c5b2c1998598f0550",
"department": "Development",
"region": "Europe"
},
{
"_key": "5b1808145b2c1998598f054f",
"department": [
"Sales",
"Marketing"
],
"region": [
"US",
"UK"
]
},
{
"_key": "5b1809a75b2c1998598f0551",
"department": "Sales"
}
]
Result:
[
{
"_id": {
"department": "Development",
"region": "Europe"
},
"count": 1
},
{
"_id": {
"department": "Marketing",
"region": "UK"
},
"count": 2
},
{
"_id": {
"department": "Marketing",
"region": "US"
},
"count": 2
},
{
"_id": {
"department": "Sales",
"region": null
},
"count": 1
},
{
"_id": {
"department": "Sales",
"region": "UK"
},
"count": 2
},
{
"_id": {
"department": "Sales",
"region": "US"
},
"count": 2
}
]
I'm quite new in mongodb and mongoose. I don't know if my query is working but when I add some geojson to my code it returns null.
My only target is can filter my data using state,country and state and also search nearby places. It would be really great help if someone can help me. Thanks
var query = {
$and : [
{city : new RegExp('^'+req.body.city+'$', "i") },
{state : req.body.state},
{country : req.body.country},
{
loc : {
$nearSphere : {
$geometry : {
type : "Point",
coordinates : [-117.16108380000003,32.715738]
},
$maxDistance : 100
}
}
}
Business.find(query).populate('deal_id').sort({business_type : -1,deal_id : -1})
.exec(function(err,businesses){
res.json(businesses)
return
})
I don't know if im doing it right, here's my sample data:
[
{
"_id": "5a0b1f489929442c36fd5c83",
"business_row": 29160,
"created_at": "2017-11-14T16:52:10.130Z",
"owner_name": "David Lui",
"company_website": "",
"phone_number": "604-273-3288",
"contact_name": "David Lui",
"zip_postal": "V6X 3Z9",
"state": "British Columbia",
"country": "Canada",
"city": "Richmond",
"address": "3779 Sexsmith Rd # 2172 Richmond British Columbia",
"company_name": "Aem Seafood",
"__v": 1,
"slug": "Aem-Seafood&Richmond",
"loc": {
"coordinates": [
"-123.129488",
"49.185359"
],
"type": "Point"
},
"deal_id": [],
"is_favorite": false,
"is_draft": false,
"has_featured": false,
"owner_id": [
"5a0adcf9f7205f0004535def"
],
"files": [],
"operations": [],
"sub_category": [],
"category_options": [
{
"value": "5a0b186b9f3a4a2710075654",
"sub_cat": {
"value": "59f6d13d00086a6e645c50a4",
"label": "Meat And Fish Markets"
}
}
],
"category_id": [
"5a0b186b9f3a4a2710075654"
],
"business_type_name": "Free",
"business_type": "0",
"user_id": [
"5a0adcf9f7205f0004535def"
]
}
]
turns out i don't need to query the city,state and country for it and use $geoWithIn