Importation of method EF 3.0 - trouble with results - brightway

I wrote a script to import the characterization factors of the LCIA method EF 3.0 (adapated) on Brightway. I think it works fine as I see the right characterization factors on the Activity Browser (ex for the Climate Change method : but when I run calculations with the method, the results are not the same as on Simapro (where I got the CSV Import File from) : And for instance the result is 0 for the Climate Change method. Do you know what can be the issue ?
It seems that the units are different but it is the same for the other methods that are available on Brightway.
Besides, I saw on another question that there would be a method implemented to import the EF 3.0 method, is it available yet ?
Thank you very much for your help.
Code of the importation script :
import brightway2 as bw
import csv
import uuid
from bw2data import mapping
from bw2data.utils import recursive_str_to_unicode
class import_method_EF:
'''Class for importing the EF method from Simapro export CSV file to Brightway. '''
def __init__(
self,
project_name,
name_file,
):
self.project_name = project_name
self.name_file = name_file
self.db_biosphere = bw.Database('biosphere3')
#Definition of the dictionnary for the correspondance between the Simapro and the ecoinvent categories
self.dict_categories = {'high. pop.' : 'urban air close to ground',
'low. pop.' : 'low population density, long-term',
'river' : 'surface water',
'in water' : 'in water',
'(unspecified)' : '',
'ocean' : 'ocean',
'indoor' : 'indoor',
'stratosphere + troposphere' : 'lower stratosphere + upper troposphere',
'low. pop., long-term' : 'low population density, long-term',
'groundwater, long-term' : 'ground-, long-term',
'agricultural' : 'agricultural',
'industrial' : 'industrial',
}
#Definition of the dictionnary of the ecoinvent units abreviations
self.dict_units = {'kg' : 'kilogram',
'kWh' : 'kilowatt hour',
'MJ' : 'megajoule',
'p':'p',
'unit':'unit',
'km':'kilometer',
'my' : 'meter-year',
'tkm' : 'ton kilometer',
'm3' : 'cubic meter',
'm2' :'square meter',
'kBq' : 'kilo Becquerel',
'm2a' : 'm2a', #à modifier
}
def importation(self) :
"""
Makes the importation from the Simapro CSV file to Brightway.
"""
#Set the current project
bw.projects.set_current(self.project_name)
self.data = self.open_CSV(self.name_file, [])
list_methods = []
new_flows = []
for i in range(len(self.data)) :
#print(self.data[i])
if self.data[i] == ['Name'] :
name_method = self.data[i+1][0]
if self.data[i] == ['Impact category'] :
list_flows = []
j = 4
while len(self.data[i+j])>1 :
biosphere_code = self.get_biosphere_code(self.data[i+j][2],self.data[i+j][1],self.data[i+j][0].lower())
if biosphere_code == 0 :
if self.find_if_already_new_flow(i+j, new_flows)[0] :
code = self.find_if_already_new_flow(i+j, new_flows)[1]
list_flows.append((('biosphere3', code),float(self.data[i+j][4].replace(',','.'))))
else :
code = str(uuid.uuid4())
while (self.db_biosphere.name, code) in mapping:
code = str(uuid.uuid4())
new_flows.append({'amount' : float(self.data[i+j][4].replace(',','.')),
'CAS number' : self.data[i+j][3],
'categories' : (self.data[i+j][0].lower(), self.dict_categories[self.data[i+j][1]]),
'name' : self.data[i+j][2],
'unit' : self.dict_units[self.data[i+j][5]],
'type' : 'biosphere',
'code' : code})
list_flows.append((('biosphere3', code),float(self.data[i+j][4].replace(',','.'))))
else :
list_flows.append((('biosphere3', biosphere_code),float(self.data[i+j][4].replace(',','.'))))
j+=1
list_methods.append({'name' : self.data[i+1][0],
'unit' : self.data[i+1][1],
'flows' : list_flows})
new_flows = recursive_str_to_unicode(dict([self._format_flow(flow) for flow in new_flows]))
if new_flows :
print('new flows :',len(new_flows))
self.new_flows = new_flows
biosphere = bw.Database(self.db_biosphere.name)
biosphere_data = biosphere.load()
biosphere_data.update(new_flows)
biosphere.write(biosphere_data)
print('biosphere_data :',len(biosphere_data))
for i in range(len(list_methods)) :
method = bw.Method((name_method,list_methods[i]['name']))
method.register(**{'unit':list_methods[i]['unit'],
'description':''})
method.write(list_methods[i]['flows'])
print(method.metadata)
method.load()
def open_CSV(self, CSV_file_name, list_rows):
'''
Opens a CSV file and gets a list of the rows.
: param : CSV_file_name = str, name of the CSV file (must be in the working directory)
: param : list_rows = list, list to get the rows
: return : list_rows = list, list of the rows
'''
#Open the CSV file and read it
with open(CSV_file_name, 'rt') as csvfile:
data = csv.reader(csvfile, delimiter = ';')
#Write every row in the list
for row in data:
list_rows.append(row)
return list_rows
def get_biosphere_code(self, simapro_name, simapro_cat, type_biosphere):
"""
Gets the Brightway code of a biosphere process given in a Simapro format.
: param : simapro_name = str, name of the biosphere process in a Simapro format.
: param : simapro_cat = str, category of the biosphere process (ex : high. pop., river, etc)
: param : type_biosphere = str, type of the biosphere process (ex : Emissions to water, etc)
: return : 0 if the process is not found in biosphere, the code otherwise
"""
if 'GLO' in simapro_name or 'RER' in simapro_name :
simapro_name = simapro_name[:-5]
if '/m3' in simapro_name :
simapro_name = simapro_name[:-3]
#Search in the biosphere database, depending on the category
if simapro_cat == '' :
act_biosphere = self.db_biosphere.search(simapro_name, filter={'categories' : (type_biosphere,)})
else :
act_biosphere = self.db_biosphere.search(simapro_name, filter={'categories' : (type_biosphere, self.dict_categories[simapro_cat])})
#Pourquoi j'ai fait ça ? ...
for act in act_biosphere :
if simapro_cat == '' :
if act['categories'] == (type_biosphere, ):
return act['code']
else :
if act['categories'] == (type_biosphere, self.dict_categories[simapro_cat]):
return act['code']
return 0
def _format_flow(self, cf):
# TODO
return (self.db_biosphere.name, cf['code']), {
'exchanges': [],
'categories': cf['categories'],
'name': cf['name'],
'type': ("resource" if cf["categories"][0] == "resource"
else "emission"),
'unit': cf['unit'],
}
def find_if_already_new_flow(self, n, new_flows) :
"""
"""
for k in range(len(new_flows)) :
if new_flows[k]['name'] == self.data[n][2] :
return True, new_flows[k]['code']
return False, 0
Edit : I made a modification in the get_biosphere_code method and it works better (it was not finding some biosphere flows) but I still have important differences between the results I get on Brightway and the results I get on Simapro. My investigations led me to the following observations :
there are some differences in ecoinvent activities and especially in the lists of biosphere flows (should be a sink of differences in result), some are missing in Brightway and also in the ecoSpold data that was used for the importation compared to the data in Simapro
it seems that the LCA calculation doesn't work the same way as regards the subcategories : for example, the biosphere flow Carbon dioxide, fossil (air,) is in the list of caracterization factors for the Climate Change method and when looking at the inventory in the Simapro LCA results, it appears that all the Carbon dioxide, fossil flows to air participate in the Climate Change impact, no matter what their subcategory is. But Brightway does not work this way and only takes into account the flows that are exactly the same, so it leads to important differences in the results.

In LCA there's no agreement on elementary flows and archetypical emission scenarios / context (https://doi.org/10.1007/s11367-017-1354-3), and implementations of the impact assessment methods differ (https://www.lifecycleinitiative.org/portfolio_category/lcia/).
It is not unusual that the same activity and same impact assessment method returns different results in different software. There are some attempts to improve the current practices (see e.g , https://github.com/USEPA/LCIAformatter).

Related

Regex Error and Improvement Driving Licence Data Extraction

I am trying to extract the Name, License No., Date Of Issue and Validity from an Image I processed using Pytesseract. I am quite a lot confused with regex but still went through few documentations and codes over the web.
I got till here:
import pytesseract
import cv2
import re
import cv2
from PIL import Image
import numpy as np
import datetime
from dateutil.relativedelta import relativedelta
def driver_license(filename):
"""
This function will handle the core OCR processing of images.
"""
i = cv2.imread(filename)
newdata=pytesseract.image_to_osd(i)
angle = re.search('(?<=Rotate: )\d+', newdata).group(0)
angle = int(angle)
i = Image.open(filename)
if angle != 0:
#with Image.open("ro2.jpg") as i:
rot_angle = 360 - angle
i = i.rotate(rot_angle, expand="True")
i.save(filename)
i = cv2.imread(filename)
# Convert to gray
i = cv2.cvtColor(i, cv2.COLOR_BGR2GRAY)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
i = cv2.dilate(i, kernel, iterations=1)
i = cv2.erode(i, kernel, iterations=1)
txt = pytesseract.image_to_string(i)
print(txt)
text = []
data = {
'firstName': None,
'lastName': None,
'age': None,
'documentNumber': None
}
c = 0
print(txt)
#Splitting lines
lines = txt.split('\n')
for lin in lines:
c = c + 1
s = lin.strip()
s = s.replace('\n','')
if s:
s = s.rstrip()
s = s.lstrip()
text.append(s)
try:
if re.match(r".*Name|.*name|.*NAME", s):
name = re.sub('[^a-zA-Z]+', ' ', s)
name = name.replace('Name', '')
name = name.replace('name', '')
name = name.replace('NAME', '')
name = name.replace(':', '')
name = name.rstrip()
name = name.lstrip()
nmlt = name.split(" ")
data['firstName'] = " ".join(nmlt[:len(nmlt)-1])
data['lastName'] = nmlt[-1]
if re.search(r"[a-zA-Z][a-zA-Z]-\d{13}", s):
data['documentNumber'] = re.search(r'[a-zA-Z][a-zA-Z]-\d{13}', s)
data['documentNumber'] = data['documentNumber'].group().replace('-', '')
if not data['firstName']:
name = lines[c]
name = re.sub('[^a-zA-Z]+', ' ', name)
name = name.rstrip()
name = name.lstrip()
nmlt = name.split(" ")
data['firstName'] = " ".join(nmlt[:len(nmlt)-1])
data['lastName'] = nmlt[-1]
if re.search(r"[a-zA-Z][a-zA-Z]\d{2} \d{11}", s):
data['documentNumber'] = re.search(r'[a-zA-Z][a-zA-Z]\d{2} \d{11}', s)
data['documentNumber'] = data['documentNumber'].group().replace(' ', '')
if not data['firstName']:
name = lines[c]
name = re.sub('[^a-zA-Z]+', ' ', name)
name = name.rstrip()
name = name.lstrip()
nmlt = name.split(" ")
data['firstName'] = " ".join(nmlt[:len(nmlt)-1])
data['lastName'] = nmlt[-1]
if re.match(r".*DOB|.*dob|.*Dob", s):
yob = re.sub('[^0-9]+', ' ', s)
yob = re.search(r'\d\d\d\d', yob)
data['age'] = datetime.datetime.now().year - int(yob.group())
except:
pass
print(data)
I need to extract the Validity and Issue Date as well. But not getting anywhere near it. Also, I have seen using regex shortens the code like a lot so is there any better optimal way for it?
My input data is a string somewhat like this:
Transport Department Government of NCT of Delhi
Licence to Drive Vehicles Throughout India
Licence No. : DL-0820100052000 (P) R
N : PARMINDER PAL SINGH GILL
: SHRI DARSHAN SINGH GILL
DOB: 10/05/1966 BG: U
Address :
104 SHARDA APPTT WEST ENCLAVE
PITAMPURA DELHI 110034
Auth to Drive Date of Issue
M.CYL. 24/02/2010
LMV-NT 24/02/2010
(Holder's Sig natu re)
Issue Date : 20/05/2016
Validity(NT) : 19/05/2021 : c
Validity(T) : NA Issuing Authority
InvCarrNo : NA NWZ-I, WAZIRPUR
Or like this:
in
Transport Department Government of NCT of Delhi
Licence to Drive Vehicles Throughout India
2
Licence No. : DL-0320170595326 () WN
Name : AZAZ AHAMADSIDDIQUIE
s/w/D : SALAHUDDIN ALI
____... DOB: 26/12/1992 BG: O+
\ \ Address:
—.~J ~—; ROO NO-25 AMK BOYS HOSTEL, J.
— NAGAR, DELHI 110025
Auth to Drive Date of Issue
M.CYL. 12/12/2017
4 wt 4
Iseue Date: 12/12/2017 a
falidity(NT) < 2037
Validity(T) +: NA /
Inv CarrNo : NA te sntian sana
Note: In the second example you wouldn't get the validity, will optimise the OCR for later. Any proper guide which can help me with regex which is a bit simpler would be good.
You can use this pattern: (?<=KEY\s*:\s*)\b[^\n]+ and replace KEY with one of the issues of the date, License No. and others.
Also for this pattern, you need to use regex library.
Code:
import regex
text1 = """
Transport Department Government of NCT of Delhi
Licence to Drive Vehicles Throughout India
Licence No. : DL-0820100052000 (P) R
N : PARMINDER PAL SINGH GILL
: SHRI DARSHAN SINGH GILL
DOB: 10/05/1966 BG: U
Address :
104 SHARDA APPTT WEST ENCLAVE
PITAMPURA DELHI 110034
Auth to Drive Date of Issue
M.CYL. 24/02/2010
LMV-NT 24/02/2010
(Holder's Sig natu re)
Issue Date : 20/05/2016
Validity(NT) : 19/05/2021 : c
Validity(T) : NA Issuing Authority
InvCarrNo : NA NWZ-I, WAZIRPUR
"""
for key in ('Issue Date', 'Licence No\.', 'N', 'Validity\(NT\)'):
print(regex.findall(fr"(?<={key}\s*:\s*)\b[^\n]+", text1, regex.IGNORECASE))
Output:
['20/05/2016']
['DL-0820100052000 (P) R']
['PARMINDER PAL SINGH GILL']
['19/05/2021 : c']
You can also use re with a single regex based on alternation that will capture your keys and values:
import re
text = "Transport Department Government of NCT of Delhi\nLicence to Drive Vehicles Throughout India\n\nLicence No. : DL-0820100052000 (P) R\nN : PARMINDER PAL SINGH GILL\n\n: SHRI DARSHAN SINGH GILL\n\nDOB: 10/05/1966 BG: U\nAddress :\n\n104 SHARDA APPTT WEST ENCLAVE\nPITAMPURA DELHI 110034\n\n\n\nAuth to Drive Date of Issue\nM.CYL. 24/02/2010\nLMV-NT 24/02/2010\n\n(Holder's Sig natu re)\n\nIssue Date : 20/05/2016\nValidity(NT) : 19/05/2021 : c\nValidity(T) : NA Issuing Authority\nInvCarrNo : NA NWZ-I, WAZIRPUR"
search_phrases = ['Issue Date', 'Licence No.', 'N', 'Validity(NT)']
reg = r"\b({})\s*:\W*(.+)".format( "|".join(sorted(map(re.escape, search_phrases), key=len, reverse=True)) )
print(re.findall(reg, text, re.IGNORECASE))
Output of this short online Python demo:
[('Licence No.', 'DL-0820100052000 (P) R'), ('N', 'PARMINDER PAL SINGH GILL'), ('Issue Date', '20/05/2016'), ('Validity(NT)', '19/05/2021 : c')]
The regex is
\b(Validity\(NT\)|Licence\ No\.|Issue\ Date|N)\s*:\W*(.+)
See its online demo.
Details:
map(re.escape, search_phrases) - escapes all special chars in your search phrases to be used as literal texts in a regex (else, . will match any chars, ? won't match a ? char, etc.)
sorted(..., key=len, reverse=True) - sorts the search phrases by length in descending order (to get longer matches first)
"|".join(...) - creates an alternation pattern, a|b|c|...
r"\b({})\s*:\W*(.+)".format( ... ) - creates the final regex.
Regex details
\b - a word boundary (NOTE: replace with (?m)^ if your matches occur at the beginning of a line)
(Validity\(NT\)|Licence\ No\.|Issue\ Date|N) - Group 1: one of the search phrases
\s* - zero or more whitespaces
: - a colon
\W* - zero or more non-word chars
(.+) - (capturing) Group 2: one or more chars other than line break chars, as many as possible.

How do i resolve \n for string concatenation in python

I have used "\n" for string concatenation in the python code but it doesn't working. "\n" is appending along with the context to the list data
data = []
context_data = [('What is the available storage capability', '124578'), ('what is the available budget set', '12587'), ('what is the available budget set', '12587')]
for part in context_data:
s = "User : "+part[0]+" \nUlta : "+part[1]
data.append(s)
print(data)
['User : What is the available storage capability \nUlta : 124578', 'User : what is the available budget set \nUlta : 12587', 'User : what is the available budget set \nUlta : 12587']
It is not working as you are appending it to data... try it without data.
Code:
context_data = [('What is the available storage capability', '124578'), ('what is the available budget set', '12587'), ('what is the available budget set', '12587')]
for part in context_data:
s = "User : "+part[0]+" \nUlta : "+part[1]
print(s)
Output:
User : What is the available storage capability
Ulta : 124578
User : what is the available budget set
Ulta : 12587
User : what is the available budget set
Ulta : 12587

how to create list of dictionary in this code?

I have some names and scores as follows
input = {
'Maths': dict(Mohsen=19, Sadegh=18, Hafez=15),
'Physics': dict(Sadegh=16, Hafez=17, Mohsen=17),
'Chemistry': dict(Hafez=13),
'Literature': dict(Sadegh=14),
'Biology': dict(Mohsen=16, Sadegh=10),
}
if a person don't have any lesson its score consider zero also get avrege of scores's person and sort final list by averge and i want to get an output like this.
answer = [
dict(Name='Sadegh', Literature=14, Chemistry=0, Maths=18, Physics=16, Biology=10, Average=11.6),
dict(Name='Mohsen', Maths=19, Physics=17, Chemistry=0, Biology=16, Literature=0, Average=10.4),
dict(Name='Hafez', Chemistry=13, Biology=0, Physics=17, Literature=0, Maths=15, Average=9),
]
how to do it?
Essentially, you have a dictionary, where the information is arranged based on subjects, where for each subject, you have student marks. You want to collection all information related to each student in separate dictionaries.
One of the approaches which can try, is as below:
Try converting the data which you have into student specific data and then you can calculate the Average of the Marks of all subjects for that student. There is a sample code below.
Please do note that, this is just a sample and you should be trying
out a solution by yourself. There are many alternate ways of doing it and you should explore them by yourself.
The below code works with Python 2.7
from __future__ import division
def convert_subject_data_to_student_data(subject_dict):
student_dict = {}
for k, v in subject_dict.items():
for k1, v1 in v.items():
if k1 not in student_dict:
student_dict[k1] = {k:v1}
else:
student_dict[k1][k] = v1
student_list = []
for k,v in student_dict.items():
st_dict = {}
st_dict['Name'] = k
st_dict['Average'] = sum(v.itervalues()) / len(v.keys())
st_dict.update(v)
student_list.append(st_dict)
print student_list
if __name__ == "__main__":
subject_dict = {
'Maths': dict(Mohsen=19, Sadegh=18, Hafez=15),
'Physics': dict(Sadegh=16, Hafez=17, Mohsen=17),
'Chemistry': dict(Hafez=13),
'Literature': dict(Sadegh=14),
'Biology': dict(Mohsen=16, Sadegh=10),
}
convert_subject_data_to_student_data(subject_dict)
sample_input = {
'Maths': dict(Mohsen=19, Sadegh=18, Hafez=15),
'Physics': dict(Sadegh=16, Hafez=17, Mohsen=17),
'Chemistry': dict(Hafez=13),
'Literature': dict(Sadegh=14),
'Biology': dict(Mohsen=16, Sadegh=10),
}
def foo(lessons):
result = {}
for lesson in lessons:
for user in lessons[lesson]:#dictionary
if result.get(user):
#print(result.get(user))
result.get(user).setdefault(lesson, lessons[lesson].get(user,0))
else:
result.setdefault(user, dict(name=user))
result.get(user).setdefault(lesson,lessons[lesson].get(user,0))
#return list(result.values())
return result.values()
#if name == '__main__':
print(foo(sample_input))

PyMongo: how to query a series and find the closest match

This is a simplified example of how my data is stored in MongoDB of a single athlete:
{ "_id" : ObjectId('5bd6eab25f74b70e5abb3326'),
"Result" : 12,
"Race" : [0.170, 4.234, 9.170]
"Painscore" : 68,
}
Now when this athlete has performed a race I want to search for the race that was MOST similar to the current one, and hence I want to compare both painscores.
IOT get the best 'match' I tried this:
query = [0.165, 4.031, 9.234]
closestBelow = db[athlete].find({'Race' : {"$lte": query}}, {"_id": 1, "Race": 1}).sort("Race", -1).limit(2)
for i in closestBelow:
print(i)
closestAbove = db[athlete].find({'Race' : {"$gte": query}}, {"_id": 1, "Race": 1}).sort("Race", 1).limit(2)
for i in closestAbove:
print(i)
This does not seem to work.
Question1: How can I give the mentioned query IOT find the race in Mongo that matches the best/closes?.. When taken in account that a race is almost never exactly the same.
Question2: How can i see a percentage of match per document so that an athlete knows how 'serious' he must interpreted the pain score?
Thank you.
Thanks to this website I found a solution: http://dataaspirant.com/2015/04/11/five-most-popular-similarity-measures-implementation-in-python/
Step 1: find your query;
Step 2: make a first selection based on query and append the results into a list (for example average);
Step 3: use a for loop to compare every item in the list with your query. Use Euclidean distance for this;
Step 4: when you have your matching processed, define the best match into a variable.
from pymongo import MongoClient
client = MongoClient('mongodb://localhost:27017/')
Database = 'Firstclass'
def newSearch(Athlete):
# STEP 1
db = client[Database]
lastDoc = [i for i in db[Athlete].find({},{ '_id': 1, 'Race': 1, 'Avarage': 1}).sort('_id', -1).limit(1)]
query = { '$and': [ { 'Average' : {'$gte': lastDoc[0].get('Average')*0.9} }, { 'Average' : {'$lte': lastDoc[0].get('Average')*1.1} } ] }
funnel = [x for x in db[Athlete].find(query, {'_id': 1, 'Race': 1}).sort('_id', -1).limit(15)]
#STEP 2
compareListID = []
compareListRace = []
for x in funnel:
if lastDoc[0].get('_id') != x.get('_id'):
compareListID.append(x.get('_id'))
compareListRace.append(x.get('Race'))
#STEP 3
for y in compareListRace:
ED = euclidean_distance(lastDoc[0].get('Race'),y)
ESlist.append(ED)
#STEP 4
matchObjID = compareListID[numpy.argmax(ESlist)]
matchRace = compareListRace[numpy.argmax(ESlist)]
newSearch('Jim')

AvroTypeException: When writing in python3

My avsc file is as follows:
{"type":"record",
"namespace":"testing.avro",
"name":"product",
"aliases":["items","services","plans","deliverables"],
"fields":
[
{"name":"id", "type":"string" ,"aliases":["productid","itemid","item","product"]},
{"name":"brand", "type":"string","doc":"The brand associated", "default":"-1"},
{"name":"category","type":{"type":"map","values":"string"},"doc":"the list of categoryId, categoryName associated, send Id as key, name as value" },
{"name":"keywords", "type":{"type":"array","items":"string"},"doc":"this helps in long run in long run analysis, send the search keywords used for product"},
{"name":"groupid", "type":["string","null"],"doc":"Use this to represent or flag value of group to which it belong, e.g. it may be variation of same product"},
{"name":"price", "type":"double","aliases":["cost","unitprice"]},
{"name":"unit", "type":"string", "default":"Each"},
{"name":"unittype", "type":"string","aliases":["UOM"], "default":"Each"},
{"name":"url", "type":["string","null"],"doc":"URL of the product to return for more details on product, this will be used for event analysis. Provide full url"},
{"name":"imageurl","type":["string","null"],"doc":"Image url to display for return values"},
{"name":"updatedtime", "type":"string"},
{"name":"currency","type":"string", "default":"INR"},
{"name":"image", "type":["bytes","null"] , "doc":"fallback in case we cant provide the image url, use this judiciously and limit size"},
{"name":"features","type":{"type":"map","values":"string"},"doc":"Pass your classification attributes as features in key-value pair"}
]}
I am able to parse this but when I try to write on this as follows, I keep getting issue. What am I missing ? This is in python3. I verified it is well formated json, too.
from avro import schema as sc
from avro import datafile as df
from avro import io as avio
import os
_prodschema = 'product.avsc'
_namespace = 'testing.avro'
dirname = os.path.dirname(__file__)
avroschemaname = os.path.join( os.path.dirname(__file__),_prodschema)
sch = {}
with open(avroschemaname,'r') as f:
sch= f.read().encode(encoding='utf-8')
f.close()
proschema = sc.Parse(sch)
print("Schema processed")
writer = df.DataFileWriter(open(os.path.join(dirname,"products.json"),'wb'),
avio.DatumWriter(),proschema)
print("Just about to append the json")
writer.append({ "id":"23232",
"brand":"Relaxo",
"category":[{"123":"shoe","122":"accessories"}],
"keywords":["relaxo","shoe"],
"groupid":"",
"price":"799.99",
"unit":"Each",
"unittype":"Each",
"url":"",
"imageurl":"",
"updatedtime": "03/23/2017",
"currency":"INR",
"image":"",
"features":[{"color":"black","size":"10","style":"contemperory"}]
})
writer.close()
What am I missing here ?

Resources