How can i get IfcOpenShell for python to write with the same unicode as the file it reads? - python-3.x

I'm using IfcOpenshell to read an .ifc file. make some changes, then write it to a new .ifc file. But IfcOpenshell is not writing the unicode the same way as it reads it.
I'm creating a script taht adds a pset with properties to each ifcelement. the value of these properties are copied from existing properties. So basically i'm creating a pset that gathers chosen information to a single place.
This has worked great until the existing values contained unicode utf-8.
It is read and decoded to show the correct value when printed, but it does not write the unicode the same way as it reads it.
I tried changing the unicode used in PyCharm, no luck. I found simular posts elsewhere without finding a fix.
From what i've read elsewhere it has something to do with the unicode encoder/decoder IfcOpenshell use, but i cant be sure.
def mk_pset():
global param_name
global param_type
global max_row
global param_map
wb = load_workbook(b)
sheet = wb.active
max_row = sheet.max_row
max_column = sheet.max_column
param_name = []
param_type = []
param_map=[]
global pset_name
pset_name = sheet.cell(row=2, column=1).value
for pm in range(2, max_row+1):
param_name.append((sheet.cell(pm, 2)).value)
param_type.append((sheet.cell(pm, 3)).value)
param_map.append((sheet.cell(pm,4)).value)
print(param_type,' - ',len(param_type))
print(param_name,' - ',len(param_name))
create_pset()
def create_pset():
ifcfile = ifcopenshell.open(ifc_loc)
create_guid = lambda: ifcopenshell.guid.compress(uuid.uuid1().hex)
owner_history = ifcfile.by_type("IfcOwnerHistory")[0]
element = ifcfile.by_type("IfcElement")
sets = ifcfile.by_type("IfcPropertySet")
list = []
for sett in sets:
list.append(sett.Name)
myset = set(list)
global antall_parametere
global index
index = 0
antall_parametere = len(param_name)
if pset_name not in myset:
property_values = []
tot_elem = (len(element))
cur_elem = 1
for e in element:
start_time_e=time.time()
if not e.is_a() == 'IfcOpeningElement':
type_element.append(e.is_a())
for rel_e in e.IsDefinedBy:
if rel_e.is_a('IfcRelDefinesByProperties'):
if not rel_e[5][4] == None:
index = 0
while index < antall_parametere:
try:
ind1 = 0
antall_ind1 = len(rel_e[5][4])
while ind1 < antall_ind1:
if rel_e[5][4][ind1][0] == param_map[index]:
try:
if not rel_e[5][4][ind1][2]==None:
p_type = rel_e[5][4][ind1][2].is_a()
p_verdi =rel_e[5][4][ind1][2][0]
p_t=param_type[index]
property_values.append(ifcfile.createIfcPropertySingleValue(param_name[index], param_name[index],ifcfile.create_entity(p_type,p_verdi),None),)
ind1 += 1
else:
ind1 +=1
except TypeError:
pass
break
else:
ind1 += 1
except AttributeError and IndexError:
pass
index += 1
index = 0
property_set = ifcfile.createIfcPropertySet(create_guid(), owner_history, pset_name, pset_name,property_values)
ifcfile.createIfcRelDefinesByProperties(create_guid(), owner_history, None, None, [e], property_set)
ifc_loc_edit = str(ifc_loc.replace(".ifc", "_Edited.ifc"))
property_values = []
print(cur_elem, ' av ', tot_elem, ' elementer ferdig. ',int(tot_elem-cur_elem),'elementer gjenstår. Det tok ',format(time.time()-start_time_e),' sekunder')
cur_elem += 1
ifcfile.write(ifc_loc_edit)
else:
###print("Pset finnes")
sg.PopupError("Pset er allerede oprettet i modell.")
I expect p_verdi written to be equal to the p_verdi read.
Original read (D\X2\00F8\X0\r):
#2921= IFCBUILDINGELEMENTPROXYTYPE('3QPADpsq71CHeCe7e3GDm5',#32,'D\X2\00F8\X0\r',$,$,$,$,'DA64A373-DB41-C131-1A0C-A07A0340DC05',$,.NOTDEFINED.);
Written (D\X4\000000F8\X0\r):
#2921=IFCBUILDINGELEMENTPROXYTYPE('3QPADpsq71CHeCe7e3GDm5',#32,'D\X4\000000F8\X0\r',$,$,$,$,'DA64A373-DB41-C131-1A0C-A07A0340DC05',$,.NOTDEFINED.);
Decoded to "Dør"
this happens to hard spaceing also:
('2\X2\00A0\X0\090')
prints correctly as:('2 090')
gets written:
('2\X4\000000A0\X0\090')
written form is unreadable by my ifc using software.

Not so much an answere as a workaround.
After more research i found out that most IFC reading software seems to not support X4 coding, so i made a workaround with regex. Basically finding everything and replacing \X4\0000 with \X2. This has worked with all the spec chars i've encountered so far. But as stated, is just a workaround that probably wont work for everyone.
def X4trans_2(target_file,temp_fil):
from re import findall
from os import remove,rename
dec_file = target_file.replace('.ifc', '_dec.ifc')
tempname = target_file
dec_list = []
with open(temp_fil, 'r+') as r,open(dec_file, 'w', encoding='cp1252') as f:
for line in r:
findX4 = findall(r'\\X4\\0000+[\w]+\\X0\\', str(line))
if findX4:
for fx in findX4:
X4 = str(fx)
newX = str(fx).replace('\\X4\\0000', '\X2\\')
line = line.replace(str(X4), newX) # print ('Fant X4')
f.writelines(line)
remove(temp_fil)
try:
remove(target_file)
except FileNotFoundError:
pass
rename(dec_file,tempname)
It basically opens the ifc as text, find and replace X4 with X2 and writes it again.

Related

Read out .csv and hand results to a dictionary

I am learning some coding, and I am stuck with an error I can't explain. Basically I want to read out a .csv file with birth statistics from the US to figure out the most popular name in the time recorded.
My code looks like this:
# 0:Id, 1: Name, 2: Year, 3: Gender, 4: State, 5: Count
names = {} # initialise dict names
maximum = 0 # store for maximum
l = []
with open("Filepath", "r") as file:
for line in file:
l = line.strip().split(",")
try:
name = l[1]
if name in names:
names[name] = int(names[name]) + int(l(5))
else:
names[name] = int(l(5))
except:
continue
print(names)
max(names)
def max(values):
for i in values:
if names[i] > maximum:
names[i] = maximum
else:
continue
return(maximum)
print(maximum)
It seems like the dictionary does not take any values at all since the print command does not return anything. Where did I go wrong (incidentally, the filepath is correct, it takes a while to get the result since the .csv is quite big. So my assumption is that I somehow made a mistake writing into the dictionary, but I was staring at the code for a while now and I don't see it!)
A few suggestions to improve your code:
names = {} # initialise dict names
maximum = 0 # store for maximum
with open("Filepath", "r") as file:
for line in file:
l = line.strip().split(",")
names[name] = names.get(name, 0) + l[5]
maximum = [(v,k) for k,v in names]
maximum.sort(reversed=True)
print(maximum[0])
You will want to look into Python dictionaries and learn about get. It helps you accomplish the objective of making your names dictionary in less lines of codes (more Pythonic).
Also, you used def to generate a function but you never called that function. That is why it's not printing.
I propose the shorted code above. Ask if you have questions!
Figured it out.
I think there were a few flow issues: I called a function before defining it... is that an issue or is python okay with that?
Also I think I used max as a name for a variable, but there is a built-in function with the same name, that might cause an issue I guess?! Same with value
This is my final code:
names = {} # initialise dict names
l = []
def maxval(val):
maxname = max(val.items(), key=lambda x : x[1])
return maxname
with open("filepath", "r") as file:
for line in file:
l = line.strip().split(",")
name = l[1]
try:
names[name] = names.get(name, 0) + int(l[5])
except:
continue
#print(str(l))
#print(names)
print(maxval(names))

How to use 'for' loop to split values and create list of dictionaries?

Disclaimer: I am an absolute beginner in Python programming so please bear with me. I am taking a class for this and extremely desperate to get help.
I am creating a program that can read data from ANY text file which contains information like so:
Produce, Is it a fruit (Y/N)
String: "Apple","Y""Banana","Y""Pumpkin","N""Orange","Y""Eggplant","N"...
I need to convert the string to a list that will look like this:
"Apple","Y"
"Banana","Y"
"Pumpkin","N"
...
After that, I have to split/separate the values so they can fit into a dictionary that will look like this:
{"produce": xxx,"fruit": Yes/No}
For this task, I was told that I need to use the for loop to split the lines and create a list of dictionaries. But, I don't know how and where to put it.
Note that the program must be able to read data from any file. The user must also be able to modify whether the listed fruit/veg is indeed a fruit or not.
Thank you so much in advance!
I hope this is what you want...
string="apple","Y","banana","Y","pumpkin","N"
dict={}
for i in range(0,len(string),2):
dict[string[i]]=string[i+1]
for k,v in dict.items():
print(k,v)
So here I am after a lot of comments,
here is the suggested solution and this will work
x = "Apple","Y""Banana","Y""Pumpkin","N""Orange","Y""Eggplant","N"
length = len(x)
mainList = []
def split_str(s):
return [ch for ch in s]
for i in range(length):
dict = {}
if (i == 0):
dict["produce"] = x[i]
if(split_str(x[i+1])[0] == 'Y'):
dict["fruit"] = 'Yes'
else:
dict["fruit"] = 'No'
mainList.append(dict)
else:
if(i < 5):
dict["produce"] = x[i][1:]
if(split_str(x[i+1])[0] == 'Y'):
dict["fruit"] = 'Yes'
else:
dict["fruit"] = 'No'
mainList.append(dict)
print(mainList)
online fiddle link:
https://pyfiddle.io/fiddle/b3de895b-8542-419d-841a-ad7ddf008d9a/?i=true
Thank you so much for those who answered my question. I was able to run this properly using the following codes:
# Read the contents from the file first.
def get_content(filename):
f = open(filename,"r")
if f.mode == 'r':
content = f.read()
content = content.replace('"','')
return content
# Convert the contents to list of dictionaries (Y/N being a boolean).
def convert_to_list(content):
string = sorted(content.split('\n'),key=str.lower)
produce_list = []
for x in string:
a = x.split(',')
b: bool = bool('Y' in a[1])
d = dict({'produce': a[0], 'fruit':b})
restaurant_list.append(d)
return restaurant_list
I was able to complete this with help outside the site. Thank you so much for everyone's input!

How to generate a list starting from a string and ending with another?

I am trying to read a list of wavelengths from a text file containing a lot of other information but I can't figure out how to make python only start adding to the list after 'wavelength'. I can get it to end at } however. I'm sure I'm missing something painfully obvious. My text file has wavelengths stored in this format:
info1 = {xxx}
info2 = {xxx}
Wavelength = {
1.1,
2.2,
3.3
}
info3 = {
1.1,
2.2
}
I was able to get it to read into a list by finding what lines the data was in and reading in that range, but I want this to be more adaptable.
wavelength = []
with open(header, 'r') as hdr:
for line in hdr:
if 'wavlength' in line:
#add next lines to list until }
if float in line:
if '}' in line:
break
wavelength.append(line)
print(wavelength)
#output I want
[1.1,2.2,3.3]
#output I get
['}\n']
I know how to remove special characters and new lines so I'm not worried about that so much as reading the next lines until I hit }.
You can iterate through the file, and get all lines, then find the indexes of
Wavelength = { and the next }, then select the floats between them.
Getting all lines also helps you if you want to process more data using the lines later
wavelength = []
with open('file.txt', 'r') as hdr:
lines = [line.strip() for line in hdr.readlines() if line.strip()]
#Find the index of Wavelength = {
start_idx = lines.index('Wavelength = {')
#Find the index of next }
end_idx = lines.index('}',start_idx)
#Get all floats between those indexes
wavelength = [float(item.strip(',')) for item in lines[start_idx+1:end_idx]]
print(wavelength)
The output will be
[1.1, 2.2, 3.3]
One way is to simply check if the current line is a number.
for line in hdr.splitlines():
try:
num = float(line)
except ValueError:
pass
else:
wavelength.append(num)
The brackets and file format aren't taken into account, but this could be implemented by setting a boolean flag.
switch = False
...
with open('file.txt', 'r') as hdr:
for line in hdr.splitlines():
if switch:
wavelength.append(num)
if 'Wavelength' in line and '{' in line:
switch = True
elif '}' in line:
switch = False
And you could use regex to tidy it up if you like (e.g. re.search('Wavelength *{', line)).
You'll probably want to keep track of whether you've found 'wavelength' or not.
You could store it in a variable named found_wave_length which is initially False, but as soon as you find it, set to True. You only append lines if found_wave_length is true.
wavelength = []
found_wave_length = False
with open('file.txt', 'r') as hdr:
for line in hdr:
if 'Wavelength' in line:
found_wave_length = True
continue
if found_wave_length and '}' in line:
break
if found_wave_length:
#add next lines to list until }
wavelength.append(line)
print(wavelength)
Another solution without any loops. All string methods can be chained together for compactness.
with open(header, 'r') as f:
s = f.read()
temp = s.partition('Wavelength = {')[2] # Get everything after 'Wavelength = {'
temp = temp.partition('}')[0] # Get everything before the following '}'
temp = temp.split(',\n') # Separate individual numbers
wavelength = [float(k) for k in temp] # Convert to float
Here's a simple short version of it. Start after 'Wavelength' , end at '}', lots of other info in the file, just like you said.
wavelength = []
flag = 0
for line in open('wavefile.txt','r'):
if '}' in line:
flag = 0
if 'Wavelength' in line:
flag = 1
continue
if flag == 1:
wavelength.append(line.strip())
print(wavelength)

Referenced variable isn't recognized by python

I am developing a program which works with a ; separated csv.
When I try to execute the following code
def accomodate(fil, targets):
l = fil
io = []
ret = []
for e in range(len(l)):
io.append(l[e].split(";"))
for e in io:
ter = []
for theta in range(len(e)):
if targets.count(theta) > 0:
ter.append(e[theta])
ret.append(ter)
return ret
, being 'fil' the read rows of the csv file and 'targets' a list which contains the columns to be chosen. While applying the split to the csv file it raises the folowing error: "'l' name is not defined" while as far as I can see the 'l' variable has already been defined.
Does anyone know why this happens? Thanks beforehand
edit
As many of you have requested, I shall provide with an example.
I shall post an example of csv, not a shard of the original one. It comes already listed
k = ["Cookies;Brioche;Pudding;Pie","Dog;Cat;Bird;Fish","Boat;Car;Plane;Skate"]
accomodate(k, [1,2]) = [[Brioche, Pudding], [Cat, Bird], [Car, Plane]]
You should copy the content of fil list:
l = fil.copy()

how to multiprocess large text files in python?

I tried to digest lines of a DictReader object after I read in a 60 MB csv file. I asked the question here: how to chunk a csv (dict)reader object in python 3.2?. (Code repated below.)
However, now I realize that chunking up the original text file might as well do the trick (and do the DictRead and the line-by-line digest later on). However, I found no io tool that multiprocessing.Pool could use.
Thanks for any thoughts!
source = open('/scratch/data.txt','r')
def csv2nodes(r):
strptime = time.strptime
mktime = time.mktime
l = []
ppl = set()
for row in r:
cell = int(row['cell'])
id = int(row['seq_ei'])
st = mktime(strptime(row['dat_deb_occupation'],'%d/%m/%Y'))
ed = mktime(strptime(row['dat_fin_occupation'],'%d/%m/%Y'))
# collect list
l.append([(id,cell,{1:st,2: ed})])
# collect separate sets
ppl.add(id)
return (l,ppl)
def csv2graph(source):
r = csv.DictReader(source,delimiter=',')
MG=nx.MultiGraph()
l = []
ppl = set()
# Remember that I use integers for edge attributes, to save space! Dic above.
# start: 1
# end: 2
p = Pool(processes=4)
node_divisor = len(p._pool)*4
node_chunks = list(chunks(r,int(len(r)/int(node_divisor))))
num_chunks = len(node_chunks)
pedgelists = p.map(csv2nodes,
zip(node_chunks))
ll = []
for l in pedgelists:
ll.append(l[0])
ppl.update(l[1])
MG.add_edges_from(ll)
return (MG,ppl)

Resources