I currently have made a python program, request JSON data from an API. Now here is the thing though this JSON actually contains other request Urls to get extra data from that object.
import requests
import json
import sys
import os
import geojson
response = requests.get("http://api.gipod.vlaanderen.be/ws/v1/workassignment", params = {"CRS": "Lambert72"})
print(response.status_code)
text = json.dumps(response.json(),sort_keys=True, indent=4)
print(text)
f = open("text.json", "wt")
f.write(text)
print(os.getcwd())
JSON from request, the other request URLs including parameters is in the detail column.
[
{
"gipodId": 103246,
"owner": "Eandis Leuven",
"description": ", , ZAVELSTRAAT: E Nieuw distributienet (1214m)",
"startDateTime": "2007-12-03T06:00:00",
"endDateTime": "2014-01-06T19:00:00",
"importantHindrance": false,
"coordinate": {
"coordinates": [
4.697028256276443,
50.896894135898485
],
"type": "Point",
"crs": {
"type": "name",
"properties": {
"name": "urn:ogc:def:crs:OGC:1.3:CRS84"
}
}
},
**"detail": http://api.gipod.vlaanderen.be/ws/v1/workassignment/103246?crs=4326,
"cities": ["Leuven"]**
}
],
"latestUpdate": "2016-11-16T11:32:39.253"
}
The first request just gets the points (each unique with a certain id), while the second request gets the "details data" which also has polygon data and multiline.
Get Url:
http://api.gipod.vlaanderen.be/ws/v1/workassignment/[id]
{ "comment" : null,
"contactDetails" : { "city" : "Leuven",
"country" : "België",
"email" : null,
"extraAddressInfo" : null,
"firstName" : null,
"lastName" : null,
"number" : "58",
"organisation" : "Eandis Leuven",
"phoneNumber1" : "078/35.35.34",
"phoneNumber2" : null,
"postalCode" : "3012",
"street" : "Aarschotsesteenweg"
},
"contractor" : null,
"mainContractor" : null,
"description" : ", , ZAVELSTRAAT: E Nieuw distributienet (1214m)",
"diversions" : [
{
"gipodId": 1348152,
"reference": "IOW-TERRAS-2013-01-Z",
"description": "Horecaterras op parkeerstrook (Lierbaan 12)",
"comment": null,
"geometry": {
"geometries": [
{
"coordinates": [[[3.212947654779088, 51.175784679668915],
[3.2151308569159482, 51.17366647833133],
[3.216112818368467, 51.17328051591839],
[3.2186926906668876, 51.173044950954456],
[3.2204789191276944, 51.173098278776514],
[3.221602856602255, 51.173333934695286]]],
"type": "MultiLineString",
"crs": null
}
],
"type": "GeometryCollection",
"crs": {
"type": "name",
"properties": {
"name": "urn:ogc:def:crs:OGC:1.3:CRS84"
}
}
},
"periods": [{"startDateTime": "2013-04-09T00:00:00","endDateTime": "2013-10-31T00:00:00"}],
"recurrencePattern": null,
"latestUpdate": "2014-01-24T10:23:08.917",
"streets": null,
"diversionTypes": null,
"diversionDirection":
{
"type": 0,
"description": "Beide"
},
"status": "Vergund",
"contactDetails": {
"organisation": "Café Real",
"lastName": "Vets",
"firstName": "Peggy",
"phoneNumber1": null,
"phoneNumber2": null,
"email": "peggy.vets#skynet.be",
"street": "Lierbaan",
"number": "12",
"postalCode": "2580",
"city": "Putte",
"country": "België",
"extraAddressInfo": null
}
"url": null,
}
],
"endDateTime" : "2014-01-06T19:00:00",
"gipodId" : 103246,
"hindrance" : { "description" : null,
"direction" : null,
"effects" : [ "Fietsers hebben doorgang",
"Handelaars bereikbaar",
"Verminderde doorstroming in 1 richting",
"Voetgangers op de rijweg",
"Voetgangers hebben doorgang"
],
"important" : false,
"locations" : [ "Voetpad" ]
},
"latestUpdate" : "2013-06-18T03:43:28.17",
"location" : { "cities" : [ "Leuven" ],
"coordinate" : { "coordinates" : [ 4.697028256276443,
50.896894135898485
],
"crs" : { "properties" : { "name" : "urn:ogc:def:crs:OGC:1.3:CRS84" },
"type" : "name"
},
"type" : "Point"
},
"geometry" : { "coordinates" : [ [ [ [ 4.699934331336474,
50.90431808607037
],
[ 4.699948535632464,
50.90431829749237
],
[ 4.699938837004092,
50.90458139231922
],
[ 4.6999246328435396,
50.90458118062111
],
[ 4.699934331336474,
50.90431808607037
]
] ]
],
"crs" : { "properties" : { "name" : "urn:ogc:def:crs:OGC:1.3:CRS84" },
"type" : "name"
},
"type" : "MultiPolygon"
}
},
"owner" : "Eandis Leuven",
"reference" : "171577",
"startDateTime" : "2007-12-03T06:00:00",
"state" : "In uitvoering",
"type" : "Werken aan nutsleiding",
"url" : "http://www.eandis.be"
}
Now here is the deal, this request has to be repeated for each object I get from the First API Request. And this can be over one hundred objects. So logic dictates this has to happen in a loop, though how to start is bit..troublesome.
You can make you of functions in this case.
Your first function can simply fetch the list of the points. Your second function can simply fetch the data of details.
def fetch_details(url: str):
""" Makes request call to get the data of detail """
response = requests.get(url)
# any other processe
def fetch_points(url: str):
response = requests.get(url)
for obj in response.json():
fetch_details(obj.get("detail"))
api_url = "api.gipod.vlaanderen.be/ws/v1/workassignment"
fetch_points(api_url)
Related
I have a document contain title with "Hard work & Success". I need to do a search for this document. And if I typed "Hardwork" (without spacing) it didn't returning any value. but if I typed "hard work" then it is returning the document.
this is the query I have used :
const search = qObject.search;
const payload = {
from: skip,
size: limit,
_source: [
"id",
"title",
"thumbnailUrl",
"youtubeUrl",
"speaker",
"standards",
"topics",
"schoolDetails",
"uploadTime",
"schoolName",
"description",
"studentDetails",
"studentId"
],
query: {
bool: {
must: {
multi_match: {
fields: [
"title^2",
"standards.standard^2",
"speaker^2",
"schoolDetails.schoolName^2",
"hashtags^2",
"topics.topic^2",
"studentDetails.studentName^2",
],
query: search,
fuzziness: "AUTO",
},
},
},
},
};
if I searched for title "hard work" (included space)
then it returns data like this:
"searchResults": [
{
"_id": "92",
"_score": 19.04531,
"_source": {
"standards": {
"standard": "3",
"categoryType": "STANDARD",
"categoryId": "S3"
},
"schoolDetails": {
"categoryType": "SCHOOL",
"schoolId": "TPS123",
"schoolType": "PUBLIC",
"logo": "91748922mn8bo9krcx71.png",
"schoolName": "Carmel CMI Public School"
},
"studentDetails": {
"studentId": 270,
"studentDp": "164646972124244.jpg",
"studentName": "Nelvin",
"about": "good student"
},
"topics": {
"categoryType": "TOPIC",
"topic": "Motivation",
"categoryId": "MY"
},
"youtubeUrl": "https://www.youtube.com/watch?v=wermQ",
"speaker": "Anna Maria Siby",
"description": "How hardwork leads to success - motivational talk by Anna",
"id": 92,
"uploadTime": "2022-03-17T10:59:59.400Z",
"title": "Hard work & Success",
}
},
]
And if i search for the Keyword "Hardwork" (without spacing) it won't detecting this data. I need to make a space in it or I need to match related datas with the searching keyword. Is there any solution for this can you please help me out of this.
I made an example using a shingle analyzer.
Mapping:
{
"settings": {
"analysis": {
"filter": {
"shingle_filter": {
"type": "shingle",
"max_shingle_size": 4,
"min_shingle_size": 2,
"output_unigrams": "true",
"token_separator": ""
}
},
"analyzer": {
"shingle_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"shingle_filter"
]
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "shingle_analyzer"
}
}
}
}
Now I tested it with your term. Note that the token "hardwork" was generated but the others were also generated which may be a problem for you.
GET idx-separator-words/_analyze
{
"analyzer": "shingle_analyzer",
"text": ["Hard work & Success"]
}
Results:
{
"tokens" : [
{
"token" : "hard",
"start_offset" : 0,
"end_offset" : 4,
"type" : "<ALPHANUM>",
"position" : 0
},
{
"token" : "hardwork",
"start_offset" : 0,
"end_offset" : 9,
"type" : "shingle",
"position" : 0,
"positionLength" : 2
},
{
"token" : "hardworksuccess",
"start_offset" : 0,
"end_offset" : 19,
"type" : "shingle",
"position" : 0,
"positionLength" : 3
},
{
"token" : "work",
"start_offset" : 5,
"end_offset" : 9,
"type" : "<ALPHANUM>",
"position" : 1
},
{
"token" : "worksuccess",
"start_offset" : 5,
"end_offset" : 19,
"type" : "shingle",
"position" : 1,
"positionLength" : 2
},
{
"token" : "success",
"start_offset" : 12,
"end_offset" : 19,
"type" : "<ALPHANUM>",
"position" : 2
}
]
}
I am new to contract testing and want to write a groovy contract for a response which contains sub objects with strings list of dictionaries and so on.
I have a response like this:
{
"data": [
{
"categories": [
{
"categories": [],
"oid": "abc",
"type": "xyz"
},
{
"categories": [],
"oid": "abb",
"type": "xyy"
}
],
"oid": "ab"
}
],
"meta": {
"datatype": "text",
"language": "x",
"market": "qw",
"provider": "AFP",
"status": "ok",
"statusInfo": {},
"supportedLanguages": [
"x"
]
}
}
and for that I have written the following contract:
Contract.make {
request {
method 'GET'
url '/foo'
}
response {
status 200
body(
"data" : [
(
"categories": [
(
"categories" : [],
"oid" : anyNonEmptyString(),
"type" : "xyz"
),
(
"categories" : [],
"oid" : anyNonEmptyString(),
"type" : "xyy"
)
]
"oid" : regex('\w')
)
],
"meta" : (
"datatype": "text",
"language": "x",
"market": "qw",
"provider": "AFP",
"status": "ok",
"statusInfo": (),
"supportedLanguages": ["x"]
)
)
headers {
contentType(applicationJson())
}
}
}
But is not working properly.
Can you help me to know what I am doing wrong over here.
Your contract is invalid. You're using parentheses where you should be using brackets
Contract.make {
request {
method 'GET'
url '/foo'
}
response {
status 200
body(
"data" : [
(
"categories": [
[ // was (
"categories" : [],
"oid" : anyNonEmptyString(),
"type" : "xyz"
], // was )
[ // was (
"categories" : [],
"oid" : anyNonEmptyString(),
"type" : "xyy"
] // was )
]
"oid" : regex('\w')
)
],
"meta" : [ // was (
"datatype": "text",
"language": "x",
"market": "qw",
"provider": "AFP",
"status": "ok",
"statusInfo": [:], // was ()
"supportedLanguages": ["x"]
] // was )
)
headers {
contentType(applicationJson())
}
}
}
I have data like below:-
data = [
{
"name": "test4",
"datapoints": [
[currentTimestamp, count]
],
"tags": {
"name" : "MyName",
"dept" : "Engineering",
"city" : "Delhi",
"state": "Delhi",
"country" : "India"
}
}
]
And I am sending data to KairosDB server by using python script like this -
response = requests.post("http://localhost:8080" + "/api/v1/datapoints", json.dumps(data))
I know this data will be stored in three different tables:-
1. string_index
2. row_keys
3. data_points
And my query is :-
{
"metrics": [
{
"tags": {},
"name": "test4",
"aggregators": [
{
"name": "sum",
"sampling": {
"value": "1",
"unit": "milliseconds"
}
}
]
}
],
"plugins": [],
"cache_time": 0,
"start_absolute": 1529346600000
}
Now I want to know that how data will get fetched from those three tables, I mean what will the flow of data retrieval from Cassandra.
Thanks In Advance.
I'm migrating data from Mongo to Arango and I need to reproduce a $group aggregation. I have successfully reproduced the results but I'm concerned that my approach maybe sub-optimal. Can the AQL be improved?
I have a collection of data that looks like this:
{
"_id" : ObjectId("5b17f9d85b2c1998598f054e"),
"department" : [
"Sales",
"Marketing"
],
"region" : [
"US",
"UK"
]
}
{
"_id" : ObjectId("5b1808145b2c1998598f054f"),
"department" : [
"Sales",
"Marketing"
],
"region" : [
"US",
"UK"
]
}
{
"_id" : ObjectId("5b18083c5b2c1998598f0550"),
"department" : "Development",
"region" : "Europe"
}
{
"_id" : ObjectId("5b1809a75b2c1998598f0551"),
"department" : "Sales"
}
Note the value can be a string, Array or not present
In Mongo I'm using the following code to aggregate the data:
db.test.aggregate([
{
$unwind:{
path:"$department",
preserveNullAndEmptyArrays: true
}
},
{
$unwind:{
path:"$region",
preserveNullAndEmptyArrays: true
}
},
{
$group:{
_id:{
department:{ $ifNull: [ "$department", "null" ] },
region:{ $ifNull: [ "$region", "null" ] },
},
count:{$sum:1}
}
}
])
In Arango I'm using the following AQL:
FOR i IN test
LET FIELD1=(FOR a IN APPEND([],NOT_NULL(i.department,"null")) RETURN a)
LET FIELD2=(FOR a IN APPEND([],NOT_NULL(i.region,"null")) RETURN a)
FOR f1 IN FIELD1
FOR f2 IN FIELD2
COLLECT id={department:f1,region:f2} WITH COUNT INTO counter
RETURN {_id:id,count:counter}
Edit:
The APPEND is used to convert string values into an Array
Both produce results that look like this;
{
"_id" : {
"department" : "Marketing",
"region" : "US"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Development",
"region" : "Europe"
},
"count" : 1.0
}
{
"_id" : {
"department" : "Sales",
"region" : "null"
},
"count" : 1.0
}
{
"_id" : {
"department" : "Marketing",
"region" : "UK"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Sales",
"region" : "UK"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Sales",
"region" : "US"
},
"count" : 2.0
}
Your approach seems alright. I would suggest to use TO_ARRAY() instead of APPEND() to make it easier to understand though.
Both functions skip null values, thus it is unavoidable to provide some placeholder, or test for null explicitly and return an array with a null value (or whatever works best for you):
FOR doc IN test
FOR field1 IN doc.department == null ? [ null ] : TO_ARRAY(doc.department)
FOR field2 IN doc.region == null ? [ null ] : TO_ARRAY(doc.region)
COLLECT department = field1, region = field2
WITH COUNT INTO count
RETURN { _id: { department, region }, count }
Collection test:
[
{
"_key": "5b17f9d85b2c1998598f054e",
"department": [
"Sales",
"Marketing"
],
"region": [
"US",
"UK"
]
},
{
"_key": "5b18083c5b2c1998598f0550",
"department": "Development",
"region": "Europe"
},
{
"_key": "5b1808145b2c1998598f054f",
"department": [
"Sales",
"Marketing"
],
"region": [
"US",
"UK"
]
},
{
"_key": "5b1809a75b2c1998598f0551",
"department": "Sales"
}
]
Result:
[
{
"_id": {
"department": "Development",
"region": "Europe"
},
"count": 1
},
{
"_id": {
"department": "Marketing",
"region": "UK"
},
"count": 2
},
{
"_id": {
"department": "Marketing",
"region": "US"
},
"count": 2
},
{
"_id": {
"department": "Sales",
"region": null
},
"count": 1
},
{
"_id": {
"department": "Sales",
"region": "UK"
},
"count": 2
},
{
"_id": {
"department": "Sales",
"region": "US"
},
"count": 2
}
]
I am using ElasticSearch river to index a CouchDB database of tweets.
The "created_at" field doesn't conform to the "date" type and gets indexed as a String.
How would I start a river with explicitly specifying that "created_at" is a Date, so that I could do range queries on it?
I tried the following river request, but it didn't work and the field was still indexed as a String:
curl -XPUT 'localhost:9200/_river/my_db/_meta' -d '{
"type" : "couchdb",
"couchdb" : {
"host" : "localhost",
"port" : 5984,
"db" : "testtweets",
"filter" : null
},
"index" : {
"index" : "my_testing",
"type" : "my_datetesting",
"properties" : {"created_at": {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss"
}
},
"bulk_size" : "100",
"bulk_timeout" : "10ms"
}
}'
My data looks like this:
{
"_id": "262856000481136640",
"_rev": "1-0ed7c0fe655974e236814184bef5ff16",
"contributors": null,
"truncated": false,
"text": "RT #edoswald: Ocean City MD first to show that #Sandy is no joke. Pier badly damaged, sea nearly topping the seawall http://t.co/D0Wwok4 ...",
"author_name": "Casey Strader",
"author_created_at": "2011-04-21 20:00:32",
"author_description": "",
"author_location": "",
"author_geo_enabled": false,
"source": "Twitter for iPhone",
"retweeted": false,
"coordinates": null,
"author_verified": false,
"entities": {
"user_mentions": [
{
"indices": [
3,
12
],
"id_str": "10433822",
"id": 10433822,
"name": "Ed Oswald",
"screen_name": "edoswald"
}
],
"hashtags": [
{
"indices": [
47,
53
],
"text": "Sandy"
}
],
"urls": [
{
"indices": [
117,
136
],
"url": "http://t.co/D0Wwok4",
"expanded_url": "http://t.co/D0Wwok4",
"display_url": "t.co/D0Wwok4"
}
]
},
"in_reply_to_screen_name": null,
"author_id_str": "285792303",
"retweet_count": 98,
"id_str": "262856000481136640",
"favorited": false,
"source_url": "http://twitter.com/download/iphone",
"author_screen_name": "Casey_Rae22",
"geo": null,
"in_reply_to_user_id_str": null,
"author_time_zone": "Eastern Time (US & Canada)",
"created_at": "2012-10-29 09:58:48",
"in_reply_to_status_id_str": null,
"place": null
}
Thanks!