Adding binary header - python-3.x

I have a binary data file I would like to append a header to using python. Below is the code I have to create the header but I am unsure on how to add it to the test.dat file.
import struct
import os
from struct import *
date = 20151027
version = 1
datatype = str.encode('P')
indextype = str.encode('I')
recct = int(os.path.getsize("H:\\test\\test.dat")/16)
delim = str.encode(' ')
filler = str.encode(' ')
delta = 'F'
pdate = pack('l', date)
pversion = pack('h', version)
pdatatype = pack('>s', datatype)
pindextype = pack('>s', indextype)
precct = pack('l', recct)
pdelim = pack('s', delim)
pfiller = pack('<2s', filler)
header = pdate+pversion+pdatatype+pindextype+precct,pdelim,pfiller

Read the file in, then write the file out with the header. Be sure to use binary mode:
with open(r'H:\test\test.dat','rb') as f:
data = f.read()
with open(r'H:\test\test.dat','wb') as f:
f.write(header + data)
Also, you can pack in one statement:
header = struct.pack('lhssls2s',date,version,datatype,indextype,recct,delim,filler)
str.encode('P') is an odd way of saying 'P'.encode() or just b'P'.

Related

Choose encoding when converting to Sqlite database

I am converting Mbox files to Sqlite db. I do not arrive to encode the db file into utf-8.
The Python console displays the following message when converting to db:
Error binding parameter 1 - probably unsupported type.
When I visualize my data on DB Browser for SQlite, special characters don't appear and the � symbol shows up instead.
I first convert .text files to Mbox files with the following function:
def makeMBox(fIn,fOut):
if not os.path.exists(fIn):
return False
if os.path.exists(fOut):
return False
out = open(fOut,"w")
lineNum = 0
# detect encoding
readsource = open(fIn,'rt').__next__
#fInCodec = tokenize.detect_encoding(readsource)[0]
fInCodec = 'UTF-8'
for line in open(fIn,'rt', encoding=fInCodec, errors="replace"):
if line.find("From ") == 0:
if lineNum != 0:
out.write("\n")
lineNum +=1
line = line.replace(" at ", "#")
out.write(line)
out.close()
return True
Then, I convert to sqlite db:
for k in dates:
db = sqlite_utils.Database("Courriels_Sqlite/Echanges_Discussion.db")
mbox = mailbox.mbox("Courriels_MBox/"+k+".mbox")
def to_insert():
for message in mbox.values():
Dionyversite = dict(message.items())
Dionyversite["payload"] = message.get_payload()
yield Dionyversite
try:
db["Dionyversite"].upsert_all(to_insert(), alter = True, pk = "Message-ID")
except sql.InterfaceError as e:
print(e)
Thank you for your help.
I found how to fix it:
def to_insert():
for message in mbox.values():
Dionyversite = dict(message.items())
Dionyversite["payload"] = message.get_payload(decode = True)
yield Dionyversite
``
As you can see, I add `decode = True` inside `get_payload`of the `to_insert`function.

Why is only half my data being passed into my dictionary?

When I run this script I can verify that it loops through all of the values, but not all of them get passed into my dictionary
file = open('path', 'rb')
readFile = PyPDF2.PdfFileReader(file)
lineData = {}
totalPages = readFile.numPages
for i in range(totalPages):
pageObj = readFile.getPage(i)
pageText = pageObj.extractText
newTrans = re.compile(r'Jan \d{2,}')
for line in pageText(pageObj).split('\n'):
if newTrans.match(line):
newValue = re.split(r'Jan \d{2,}', line)
newValueStr = ' '.join(newValue)
newKey = newTrans.findall(line)
newKeyStr = ' '.join(newKey)
print(newKeyStr + newValueStr)
lineData[newKeyStr] = newValueStr
print(len(lineData))
There are 80+ data pairs but when I run this the dict only gets 37
Well, duplicate keys, maybe? Try to make lineData = [] and append there: lineData.append({newKeyStr:newValueStr} and then check how many records you get.

How to fix unidentified character problem while passing data from TKinter to Photoshop via Python script?

I made a GUI Application which looks like this:
The ones marked red are Tkinter Text widgets and the ones marked yellow are Tkinter Entry widgets
After taking user input, the data is to be added to a PSD file and then rendered as an image. But Lets say, after taking the following data as input:
It renders the following Photoshop file:
How do I fix this issue that it does not recognize "\n" properly and hence the rendered document is rendered useless.
Here is the code which deals with converting of the accepted user data into strings and then adding it to Photoshop template and then rendering it:
def DataAdder2CSV():
global edate, eSNO, eage, egender, ename, ePID, econtact, ecomp, eallergy, ehistory, eR
e=edate.get()
a=eSNO.get()
d=eage.get()
f=egender.get()
b=ename.get()
c=ePID.get()
g=econtact.get()
h=ecomp.get(1.0,END)
i=eallergy.get(1.0,END)
j=ehistory.get(1.0,END)
k=eR.get(1.0,END)
data=[a,b,c,d,e,f,g,h,i,j,k]
file=open("Patient_Data.csv","a", newline="")
writer=csv.writer(file, delimiter=",")
writer.writerow(data)
file.close()
messagebox.showinfo("Prescription Generator", "Data has been saved to the database successfully!")
import win32com.client, os
objShell = win32com.client.Dispatch("WScript.Shell")
UserDocs = objShell.SpecialFolders("MyDocuments")
from tkinter import filedialog
ExpDir=filedialog.askdirectory(initialdir=UserDocs, title="Choose Destination Folder")
psApp = win32com.client.Dispatch("Photoshop.Application")
psApp.Open("D:\Coding\Python Scripts\Dr Nikhil Prescription App\Prescription Generator\Presc_Template.psd")
doc = psApp.Application.ActiveDocument
lf1 = doc.ArtLayers["name"]
tol1 = lf1.TextItem
tol1.contents = b
lf2 = doc.ArtLayers["age"]
tol2 = lf2.TextItem
tol2.contents = d
lf3 = doc.ArtLayers["gender"]
tol3 = lf3.TextItem
tol3.contents = f
lf4 = doc.ArtLayers["pid"]
tol4 = lf4.TextItem
tol4.contents = c
lf4 = doc.ArtLayers["date"]
tol4 = lf4.TextItem
tol4.contents = e
lf5 = doc.ArtLayers["contact"]
tol5 = lf5.TextItem
tol5.contents = g
lf6 = doc.ArtLayers["complaint"]
tol6 = lf6.TextItem
varH=" "+h.rstrip("\n")
tol6.contents =varH
lf7 = doc.ArtLayers["allergy"]
tol7 = lf7.TextItem
tol7.contents = i.rstrip("\n")
lf8 = doc.ArtLayers["history"]
tol8 = lf8.TextItem
varJ=" "+j.rstrip("\n")
tol8.contents =varJ
lf9 = doc.ArtLayers["R"]
tol9 = lf9.TextItem
tol9.contents = k.rstrip("\n")
options = win32com.client.Dispatch('Photoshop.ExportOptionsSaveForWeb')
options.Format = 13
options.PNG8 = False
pngfile =ExpDir+f"/{c}-{b}_({e}).png"
doc.Export(ExportIn=pngfile, ExportAs=2, Options=options)
messagebox.showinfo("Prescription Generator", "Prescription has been saved in the desired location successfully!")
There are 3 ways of expressing new line characters:
MacOS uses \r
Linux uses \n
Windows uses \r\n
Python and tkinter use \n but it looks like psApp.Application uses \r instead. That is why the document isn't rendered properly. For more info read the answers to this question.

Remove .0 from a string in python

I'm reading a file called MissingItems.txt, the contents of which is a lsit of bar codes and looks like this
[3000000.0, 5000000.0, 6000000.0, 7000000.0, 8000000.0, 1234567.0, 1234568.0, 9876543.0, 3000001.0, 5000001.0, 6000001.0, 7000001.0, 8000001.0, 1234561.0, 1234561.0, 9876541.0, 6000002.0, 7000002.0, 8000002.0, 1234562.0, 1234562.0, 9876542.0,9876543.0,9876544.0]
I have replaced the square brackets and then split the line as below
OpenFile = open(r"G:MissingItems.txt","r")
for line in OpenFile:
remove = line.replace('[','')
remove1 = remove.replace(']','')
plates = remove1.split(",")
Plate1 = plates[0]
Plate2 = plates[1]
Plate3 = plates[2]
Plate4 = plates[3]
Plate5 = plates[4]
Plate6 = plates[5]
Plate7 = plates[6]
Plate8 = plates[7]
Plate9 = plates[8]
Plate10 = plates[9]
Plate11 = plates[10]
Plate12 = plates[11]
Plate13 = plates[12]
Plate14 = plates[13]
Plate15 = plates[14]
Plate16 = plates[15]
Plate17 = plates[16]
Plate18 = plates[17]
Plate19 = plates[18]
Plate20 = plates[19]
Plate21 = plates[20]
Plate22 = plates[21]
Plate23 = plates[22]
Plate24 = plates[23]
Is there a way to remove the .0 from the bar codes, preferable before splitting? So I would get '3000000', rather than '3000000.0'. I've tried to use replace, but I'm not sure how to get it to recognize they are at the end of the bar codes.
This is one approach using ast.literal_eval and int.
Ex:
import ast
with open(r"G:MissingItems.txt","r") as infile:
for line in infile:
plates = [int(i) for i in ast.literal_eval(line.strip())]
print(plates)
# --> [3000000, 5000000, 6000000, 7000000, 8000000, 1234567, 1234568, 9876543, 3000001, 5000001, 6000001, 7000001, 8000001, 1234561, 1234561, 9876541, 6000002, 7000002, 8000002, 1234562, 1234562, 9876542, 9876543, 9876544]
Your file seems to have JSON formatted lines, so you could use a JSON parser:
import json
with open(r"G:MissingItems.txt","r") as openfile:
for line in openfile:
plate = json.loads(line)
print(plate)
This makes plate a list of numbers (not strings), so the difference between 3000.0 and 3000 disappears (as they are representations of the same number). It is only when you would need to output them in a decimal representation that you would worry about the number of decimals to output.
Secondly, it is bad practice to create separate variables for plate1 plate2, ... In such a scenario you should work with a list, and access the values with plate[0], plate[1], ...

Python add ', ' to string and return:

fd = open(nom_fichier, 'r')
liste_chaine = fd.readlines()
liste_chaine2 = []
for item in liste_chaine:
if item not in "'noir\n','blanc\n','Humain\n', 'Ordinateur\n', 'False\n', 'True\n":
liste_chaine2.append(item)
liste_chaine2 = [i.replace('\n', '') for i in liste_chaine2]
return liste_chaine2
['3,3,blanc', '3,4,noir', '4,3,noir', '4,4,blanc']
i am reading a file and trying to return a string output exactly like :
3,3,blanc
4,3,noir
3,4,white
i cleaned the file with the code above but need to clean up this list to the required output
You can split your string and put it together again to meet your requirements:
string = '33blanc 34noir 43noir 44blanche'
result = '\n'.join(['{},{},{}'.format(v[0], v[1], v[2:]) for v in string.split()])
print(result)
3,3,blanc
3,4,noir
4,3,noir
4,4,blanche

Resources