Python scraping regex (word just next to the number)

Python scraping regex (word just next to the number) - python-3.x

I hope you're well. I'd like to scrape different data with regex :)
#Récupération des ingrédients
try:
ingredients = [item.text.replace("\n", "").strip() for item in soup.find_all("li", {"class": "recipe-ingredients__list__item"})]
except Exception as e:
ingredients = None
Here is the json result
"ingredients": [
"250g de porc h\u00e2ch\u00e9 (le filet mignon c'est vraiment bon)",
"1 oignon blanc",
"1 carotte",
"6 champignons parfum\u00e9s chinois (pas des champignons noirs)",
"1poign\u00e9e de vermicelles de riz (cheveux d'ange)",
"1poign\u00e9e de germes de soja",
"3 oeufs",
"2gousses d'ail",
"Galette de riz vietnamiennes (les grandes)",
"4cuill\u00e8res \u00e0 soupe de nuoc mam",
"Poivre"
Do you how I can scrape separately
the quantity (here is the number)
the quantifying (which always sticks to the number when it exists)
the name of the ingredient
I do not find how to do it with regex
Thanks for your response #Ryszard Czech :) it's the first time use regex. If I want to save directly the separated data in json:
what should be the code something like that?
#Récupération des ingrédients
try:
ingredients = [item.text.replace("\n", "").strip() for item in soup.find_all("li", {"class": "recipe-ingredients__list__item"}, [re.compile(r'^(?:(\d+)([^\W\d_]*))?(.*)', x), for x in ingredients])]
except Exception as e:
ingredients = None
Or do I need to use some pattern to apply to ingredients

Use
import json, re
j="""{"ingredients": [
"250g de porc h\u00e2ch\u00e9 (le filet mignon c'est vraiment bon)",
"1 oignon blanc",
"1 carotte",
"6 champignons parfum\u00e9s chinois (pas des champignons noirs)",
"1poign\u00e9e de vermicelles de riz (cheveux d'ange)",
"1poign\u00e9e de germes de soja",
"3 oeufs",
"2gousses d'ail",
"Galette de riz vietnamiennes (les grandes)",
"4cuill\u00e8res \u00e0 soupe de nuoc mam",
"Poivre"]}"""
jsObj = json.loads(j)
print( [re.findall(r'^(?:(\d+)([^\W\d_]*))?(.*)', x) for x in jsObj["ingredients"]] )
Output:
[[('250', 'g', " de porc hâché (le filet mignon c'est vraiment bon)")], [('1', '', ' oignon blanc')], [('1', '', ' carotte')], [('6', '', ' champignons parfumés chinois (pas des champignons noirs)')], [('1', 'poignée', " de vermicelles de riz (cheveux d'ange)")], [('1', 'poignée', ' de germes de soja')], [('3', '', ' oeufs')], [('2', 'gousses', " d'ail")], [('', '', 'Galette de riz vietnamiennes (les grandes)')], [('4', 'cuillères', ' à soupe de nuoc mam')], [('', '', 'Poivre')]]
The ^(?:(\d+)([^\W\d_]*))?(.*) expression matches optionally one or more digits (capture 1) and an optional letters after (capture 2), and then captures the rest into capture 3.

Related

Python Code only running in Debugging mode

So I've been working on a script that a co-worker of mine made now i fixed some of his issues but i cannot seem to figure out why it only actually works when i run it in debugging mode in VSC even when i run it from a normal python shell it does not give me the output files that it does when running in debug mode does anyone know why? (Some Links and sensitive company data has been removed)
here is the code:
import requests
from requests.auth import HTTPBasicAuth
import json
import csv
import os
import pandas as pd
import datetime
import urllib3
from datetime import datetime, timedelta
#______________________________________________________________________________________
#main functie
def Main():
#http request met api account naar de export lijst Rapid7
urllib3.disable_warnings() #negeert de waarschuwingen van de self signed certificaten
url = "URL REMOVED"
r= requests.get(url,verify=False, auth=HTTPBasicAuth('REMOVED', 'REMOVED))
#data opslaan van de http request in csv formaat
with open('downloaded.csv', 'wb') as csv_file:
csv_file.write(r.content)
#open het input bestand van AD
Filenameslist = "C:\Robert-Code\ComputerListForRapid7.json" #volledig pad naar het bestand toegevoegd
with open(Filenameslist) as f:
data = json.load(f)
#converteer json naar een CSV-besand
with open("computerlist.csv", "w") as f:
fieldnames = data[3].keys()
# haal de keys van de 3e regel, want soms is de eerste regel van de sourcefile leeg
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for row in data:
writer.writerow(row)
#Ik gebruik de Pandas module om alleen de kolom "name" van het Rapid7 bestand te krijgen die ik nodig heb.
# Dan draai ik naam en adres om, zodat de naam in de eerste kolom staat. Hiervoor lees ik de CSV in een dataframe
dfR7 = pd.read_csv("downloaded.csv")
titles = list(dfR7.columns)
titles[0],titles[1] = titles[1],titles[0] #draai om, zodat name in de eerste kolom staat
dfR7 = dfR7[titles] # zet de kolommen en data goed in het object
dfR7.sort_values(["Name"], inplace = True)
dfR7.drop(columns=["Address","Operating System","Site","Exploits","Malware","Vulnerabilities","Risk","Last Scan","Assessed"], inplace=True)
dfR7["Name"] = dfR7["Name"].str.split('.').str[0] #strip het domein van het FQDN
dfR7["Name"] = dfR7["Name"].str.lower() # alles lowercase
#Pandas module om ook van het AD-betand 1 kolom "name" over te houden in het object dfAD zodat ik later kan vergelijken.
dfAD = pd.read_csv("computerlist.csv")
dfAD.drop(columns=["DNSHostName","OperatingSystem","IPAddress", "LastLogon"], inplace= True)
dfAD["Computer"] = dfAD["Computer"].str.lower()
#beide objecten opslaan in een csv-bestand deze te vergelijken
dfR7.to_csv("fr7.csv", index=False)
dfAD.to_csv("fAD.csv", index=False)
with open('fr7.csv', 'r') as t1, open('fAD.csv', 'r') as t2:
fileRapid = t1.readlines()
fileAD = t2.readlines()
#de bestanden fr7.csv en fad.csv vergelijken aan de hand van een for loop
# deze dan opslaan in update.csv
with open('update.csv', 'w') as outFile:
for line in fileAD:
if line not in fileRapid:
outFile.write(line)
#hier haal ik weer het oude bestand van AD erbij om deze zometeen te mergen met het net gemaakte update.csv bestand
# zodat ik alle nuttige kolommen weer heb
dfAD = pd.read_csv("computerlist.csv")
dfAD["Computer"] = dfAD["Computer"].str.lower()
dfAD.to_csv("f1AD.csv", index=False)
# merge functie van de Pandas module
data1 = pd.read_csv('update.csv')
data2 = pd.read_csv("f1AD.csv")
output1 = pd.merge(data1, data2,
on='Computer',
how='inner')
#opslaan naar TotalresultsAD.csv
output1.to_csv("totaldifferenceAD_R7.csv", index =False)
#met de datetime module maak ik een variabele: time met de dag van vandaag minus 30 dagen
time = datetime.today() - timedelta(60)
"lees 2 x het bestand in"
dfgood = pd.read_csv("totaldifferenceAD_R7.csv")
dfbad = pd.read_csv("totaldifferenceAD_R7.csv")
#dit outputbestand geeft de assets weer die een LastLogon hebben recenter dan 30 dagen geleden
dfgood['LastLogon'] = pd.to_datetime(dfgood['LastLogon'], errors = 'coerce') #errors = 'coerce' zorgt ervoor dat de foutieve waarden in de kolom LastLogon worden genegeerd
dfgood.sort_values(["LastLogon"], inplace = True)
dfnew = (dfgood['LastLogon'] >= time)
dfnew = dfgood.loc[dfnew]
#dit outputbestand geeft de assets weer die een LastLogon hebben ouder dan 30 dagen geleden
dfbad['LastLogon'] = pd.to_datetime(dfbad['LastLogon'], errors = 'coerce') #errors = 'coerce' zorgt ervoor dat de foutieve waarden in de kolom LastLogon worden genegeerd
dfbad.sort_values(["LastLogon"], inplace = True)
newdf2 = (dfbad['LastLogon'] < time)
newdf2 = dfbad.loc[newdf2]
#wegschrijven uiteindelijke bestanden
dfnew.to_csv("newer_than_60_days.csv",index =False)
newdf2.to_csv("older_than_60_days.csv",index =False)
#opschonen van de bestanden
os.remove("FAD.csv")
os.remove("fr7.csv")
os.remove("computerlist.csv")
os.remove("downloaded.csv")
os.remove("f1AD.csv")
os.remove("update.csv")
if __name__=="__main__":
Main() ```
Thanks in advance for any help

Because I don't have a high enough SO reputation, unfortunately I can't simply comment this and need to make it an 'Answer'.
Changing
r= requests.get(url,verify=False, auth=HTTPBasicAuth('REMOVED', 'REMOVED))
to
r= requests.get(url,verify=False, auth=HTTPBasicAuth('REMOVED', 'REMOVED'))
will get the syntax highlighting all fixed up and may make it easier for someone smarter than me to assist you :)
Something that I've previously come across (primarily with web scraping packages) were functions that didn't play nicely with relative paths - perhaps changing them to absolute paths using os.path.abspath(".....") may help? It's a stab in the dark so that this 'Answer' actually has a potentially useful element to it, but may be an adjustment worth exploring.

Use of dictionaries with lists in Python

How can I look for similar keys in a Python dictionary and that its values are saved in a single key by using a list? That is, for example, the user enters a name and two different phone numbers; I'd like to store those two telephone numbers in the same key.
agenda = {}
for i in range(3):
nombre = input("Ingrese el nombre de usuario: ")
numero = int(input("Ingrese el numero de telefono del usuario: "))
agenda[nombre] = numero
for key in agenda.keys():
if key in agenda.keys():
lista = []
agenda[key] = lista.append(numero)
print(agenda)
I tried it this way but if there is any repeated name the dictionary only takes into account once beforehand. Thank you very much.

I think you mean this
agenda = {}
for i in range(3):
nombre = input("Ingrese el nombre de usuario: ")
numero = int(input("Ingrese el numero de telefono del usuario: "))
if nombre not in agenda:
agenda[nombre] = []
agenda[nombre].append(numero)
Example run:
>>> Ingrese el nombre de usuario: Pablo
>>> Ingrese el numero de telefono del usuario: 123
>>> Ingrese el nombre de usuario: Pablo
>>> Ingrese el numero de telefono del usuario: 456
>>> Ingrese el nombre de usuario: Camilo
>>> Ingrese el numero de telefono del usuario: 321
Now, if you print agenda, you get
>>> agenda
{'Pablo': [123, 456], 'Camilo': [321]}

As a complement to #Camilo's answer, you could make use of the defaultdict module (https://docs.python.org/3/library/collections.html#collections.defaultdict) to create a list by default. Additionally, you could check for repeated phone numbers.
from collections import defaultdict
agenda = defaultdict(list)
for i in range(3):
nombre = input("Ingrese el nombre de usuario: ")
numero = int(input("Ingrese el numero de telefono del usuario: "))
if numero not in agenda[nombre]:
agenda[nombre].append(numero)

cleaning multi terms fron stopwords

I have a list of expressions, and I need to remove from these expressions the stopwords.
ex = ["andare con i piedi di piombo", "avere gli occhi foderati di prosciutto", 'non chiudere occhio', 'con le mani nel sacco']
stopwords = ["ad","al", "allo", "ai","agli", "all", "alla", "col", "in", "il", "della", "un", "con", "non", "i", "di", "le", "nei", "gli"]
I tried this
for es in ex:
new_ex = ''
for word in stopwords:
new_es = es.replace(" " +word+ " ", "")
print(new_es)
The above code does not remove the stopwords
Can someone help?

using your example
ex = ["andare con i piedi di piombo", "avere gli occhi foderati di prosciutto", 'non chiudere occhio', 'con le mani nel sacco']
stopwords = ["ad","al", "allo", "ai","agli", "all", "alla", "col", "in", "il", "della", "un", "con", "non", "i", "di", "le", "nei", "gli"]
you could go with:
for es in ex:
es = es.split()
new_es = ''
for word in es:
if word not in stopwords:
new_es += word + ' '
print(new_es)

This will do the job:
sentences = [
'andare con i piedi di piombo',
'avere gli occhi foderati di prosciutto',
'non chiudere occhio',
'con le mani nel sacco'
]
words = [
'ad',
'al',
'allo',
'ai',
'agli',
'all',
'alla',
'col',
'in',
'il',
'della',
'un',
'con',
'non',
'i',
'di',
'le',
'nei',
'gli'
]
for sentence in sentences:
s = sentence
for word in words:
s = s.replace(f' {word} ', '')
print(s)
The problem in your code is that you need to make new_ex = ex and use replace() on new_ex.
Here is the output of the code above:
andarei piedipiombo
avereocchi foderatiprosciutto
non chiudere occhio
conmani nel sacco
Also note that 'non chiudere occhio' remains the same because you are looking for a word padded by spaces.

QPrinter margins under Py3 and PyQt5

I am updating my program under Python3 and PyQt5, I use QPrinter to associate different small texts (and also images) in a single PDF document. The Python2 and PyQt4 version of the program works well, but by passing it under PyQt5 (and QPrinter) I have a problem with the margins (they are much too wide afterwards); it seems that setPageMargins is not working well (at least not as well as in QPrinter's PyQt4 version). I wrote a little script inspired by the one present in my program:
#!/usr/bin/python
#-*- coding: utf-8 -*-
import sys, os
from PyQt5.QtPrintSupport import QPrinter
from PyQt5.QtGui import QTextDocument
from PyQt5.QtWidgets import QApplication
class Html_vers_pdf_fiche_eleve(QPrinter):
def __init__(self, niv_de_classe, nbre_seances, titre_prog, num_seq, incitation, demande, vocabulaire, questions, texte_ref_art_1, texte_ref_art_2, texte_ref_art_3, vignette_img_src_1, vignette_img_src_2, vignette_img_src_3, marge_gauche, marge_haute, marge_droite, marge_basse, police_normaux_caracteres, police_petits_caracteres):
super(Html_vers_pdf_fiche_eleve, self).__init__(QPrinter.HighResolution)
# Rubriques ...
variable_001 = "Classe :"
variable_002 = "Date :"
variable_003 = "Durée :"
variable_004 = "Nom :"
variable_005 = "Prénom :"
variable_006 = "Titre de la progression :"
variable_007 = "Séquence n° :"
variable_008 = "Incitation :"
variable_009 = "Demande :"
variable_010 = "Vocabulaire :"
variable_011 = "Questions (répondre derrière cette feuille) EVENTUELLEMENT ECRIRE UN COMMENTAIRE :"
variable_012 = ''
variable_014 = "Travaux d'artistes (références) :"
variable_015 = "Les élèves, évaluez vontre travail en utilisant les smileys juste en dessous de Eval Elève"
variable_016 = "Eval Elève"
variable_017 = "Evaluation Professeur"
variable_018 = "Ci-dessous se trouvent les critères d'évaluation des compétences travaillées en arts plastiques"
# HTML ; écriture de l'entête
html_entete = '''<html><head><title></title>'''+'''\n'''+'''<style type="text/css">'''+'''\n'''+'''table { border-style:solid;border-width:1.3px;border-color:#3c3c3c;font-size:'''+str(police_normaux_caracteres)+'''pt;font-family: "DejaVu Sans, sans-serif" }'''+'''\n'''+'''</style>'''+'''\n'''+'''</head>'''+'''\n'''+'''<body>'''+'''\n'''
# HTML ; écriture du niveau de classe, de la date et de la durée
html_tab_ligne_0 = '''<table width="100%" cellpadding="3"
cellspacing="0.5">'''+'''\n'''+'''<tr>'''+'''\n'''+'''<td width="22%"><p><b>'''+str(variable_001)+''' '''+str(niv_de_classe)+'''ème</b></p></td>'''+'''\n'''+'''<td width="35%"><p><b>'''+str(variable_002)+'''</b></p></td>'''+'''\n'''+'''<td width="43%"><p><b>'''+str(variable_003)+''' '''+nbre_seances+'''</b></p></td>'''+'''\n'''+'''</tr>'''+'''\n'''+'''</table>'''+'''\n'''
# HTML ; écriture du nom et du prénom
html_tab_ligne_1 = '''<table width="100%" cellpadding="3"
cellspacing="0.5">'''+'''\n'''+'''<tr>'''+'''\n'''+'''<td width="50%"><p><b>'''+str(variable_004)+'''</b></p></td>'''+'''\n'''+'''<td width="50%"><p><b>'''+str(variable_005)+'''</b></p></td>'''+'''\n'''+'''</tr>'''+'''\n'''+'''</table>'''+'''\n'''
# HTML ; écriture du titre de la progression et du numéro de la séquence
html_tab_ligne_2 = '''<table width="100%" cellpadding="3"
cellspacing="0.6">'''+'''\n'''+'''<tr>'''+'''\n'''+'''<td width="83%"><p><b><u>'''+str(variable_006)+'''</u>'''+''' '''+titre_prog+'''</b></p></td>'''+'''\n'''+'''<td width="17%"><p><u><b>'''+str(variable_007)+'''</u>'''+''' <b>'''+str(num_seq)+'''</b></p></td>'''+'''\n'''+'''</tr>'''+'''\n'''+'''</table>'''+'''\n'''
# HTML ; écriture de l'incitation
html_tab_ligne_3 = '''<table width="100%" cellpadding="3"
cellspacing="0.5">'''+'''\n'''+'''<tr>'''+'''\n'''+'''<td width="100%" bgcolor="#e1dede"><p><b><u>'''+str(variable_008)+'''</u>'''+''' '''+incitation+'''</b></p></td>'''+'''\n'''+'''</tr>'''+'''\n'''+'''</table>'''+'''\n'''
# HTML ; écriture de la demande
html_tab_ligne_4 = '''<table width="100%" cellpadding="3"
cellspacing="0.6">'''+'''\n'''+'''<tr>'''+'''\n'''+'''<td width="100%" bgcolor="#e1dede"><p><b><u>'''+str(variable_009)+'''</u></b>'''+''' '''+demande+'''</p></td>'''+'''\n'''+'''</tr>'''+'''\n'''+'''</table>'''+'''\n'''
# HTML ; écriture de l'entête de la liste des mots de vocabulaire
html_tab_ligne_5 = '''<table width="100%" cellpadding="3"
cellspacing="0.5" bgcolor="#cccccc">'''+'''\n'''+'''<tr>'''+'''\n'''+'''<td width="100%"><p><b><u>'''+str(variable_010)+'''</u></b></p></td>'''+'''\n'''+'''</tr>'''+'''\n'''+'''</table>'''+'''\n'''
# HTML ; écriture de la liste des mots de vocabulaire
html_tab_ligne_6 = '''<table width="100%" cellpadding="3"
cellspacing="0.5">'''+'''\n'''+'''<tr>'''+'''\n'''+'''<td width="100%"><p>'''+vocabulaire+'''</p></td>'''+'''\n'''+'''</tr>'''+'''\n'''+'''</table>'''+'''\n'''
# HTML ; écriture de l'entête des questions posées aux élèves
html_tab_ligne_7 = '''<table width="100%" cellpadding="3"
cellspacing="0.5" bgcolor="#cccccc">'''+'''\n'''+'''<tr>'''+'''\n'''+'''<td width="100%"><p><u><b>'''+str(variable_011)+'''</b></u></p></td>'''+'''\n'''+'''</tr>'''+'''\n'''+'''</table>'''+'''\n'''
# HTML ; écriture des questions posées aux élèves
html_tab_ligne_8 = '''<table width="100%" cellpadding="3"
cellspacing="0.5">'''+'''\n'''+'''<tr>'''+'''\n'''+'''<td width="100%"><p><b>'''+questions+'''</b></p></td>'''+'''\n'''+'''</tr>'''+'''\n'''+'''</table>'''+'''\n'''
# HTML ; écriture de l'entête Travaux d'artistes (références)
html_tab_ligne_9 = '''<table width="100%" cellpadding="3"
cellspacing="0.5" bgcolor="#cccccc">'''+'''\n'''+'''<tr>'''+'''\n'''+'''<td width="100%"><p><u><b>'''+str(variable_014)+'''</b></u></p></td>'''+'''\n'''+'''</tr>'''+'''\n'''+'''</table>'''+'''\n'''
#html_tab_ligne_9 = html_tab_ligne_9.decode('utf-8')
# HTML ; écriture du nom de l'artiste, de l'oeuvre, d'un résumé et des vignettes correspondantes
html_tab_ligne_10 = '''<table width="100%" cellpadding="3"
cellspacing="0.5">'''+'''\n'''+'''<tr>'''+'''\n'''+'''<td width="33%"><font color="#000000" size="'''+str(police_petits_caracteres)+'''" family="DejaVu Sans, sans-serif"><b>'''+texte_ref_art_1+'''</b></font><br />'''+'''<div align="center">'''+vignette_img_src_1+'''</div><p />'''+'''</td>'''+'''\n'''+'''<td width="33%"><font color="#000000" size="'''+str(police_petits_caracteres)+'''" family="DejaVu Sans, sans-serif"><b>'''+texte_ref_art_2+'''</b></font><br />'''+'''<div align="center">'''+vignette_img_src_2+'''</div><p />'''+'''</td>'''+'''\n'''+'''<td width="34%"><font color="#000000" size="'''+str(police_petits_caracteres)+'''" family="DejaVu Sans, sans-serif"><b>'''+texte_ref_art_3+'''</b></font><br />'''+'''<div align="center">'''+vignette_img_src_3+'''</div><p />'''+'''</td>'''+'''\n'''+'''</tr>'''+'''\n'''+'''</table>'''+'''\n'''
# HTML ; écriture de la fin de la page
html_body_html_final = '''</body></html>'''
#
self.doc = QTextDocument()
# Ecriture de la syntaxe HTML finale
self.doc.setHtml(html_entete + html_tab_ligne_0 + html_tab_ligne_1 + html_tab_ligne_2 + html_tab_ligne_3 + html_tab_ligne_4 + html_tab_ligne_5 + html_tab_ligne_6 + html_tab_ligne_7 + html_tab_ligne_8 + html_tab_ligne_9 + html_tab_ligne_10 + html_body_html_final)
# Ecriture du fichier PDF par QPrinter
self.printer = QPrinter(QPrinter.HighResolution)
self.printer.setOutputFormat(QPrinter.PdfFormat)
self.printer.setOrientation(QPrinter.Portrait)
self.printer.setPaperSize(QPrinter.A4)
# La taille des marges est convertie en millimètres
self.printer.setPageMargins(int(marge_gauche*10), int(marge_haute*10), int(marge_droite*10), int(marge_basse*10), QPrinter.Millimeter)
print("Le fichier pdf a été crée !")
self.printer.setOutputFileName("test.pdf")
# Ecriture finale du fichier PDF
self.doc.print_(self.printer)
if __name__ == '__main__':
app = QApplication(sys.argv)
#############################################
# Pictures (3 pictures)
img_ref_art_1 = os.path.expanduser('~')+os.sep+"college/cours/Cours_Travaux_a_faire__COLLEGE_NOUVEAUX_PROGRAMMES/Niveau_3eme_cycle_4/sequence_eleve/tom_wesselmann_still_life_20_vue_01.jpg"
img_ref_art_2 = os.path.expanduser('~')+os.sep+"college/cours/Cours_Travaux_a_faire__COLLEGE_NOUVEAUX_PROGRAMMES/Niveau_3eme_cycle_4/sequence_eleve/georges_melies_le_voyage_ds_la_lune_1902.png"
img_ref_art_3 = os.path.expanduser('~')+os.sep+"college/cours/Cours_Travaux_a_faire__COLLEGE_NOUVEAUX_PROGRAMMES/Niveau_3eme_cycle_4/sequence_eleve/louise_nevelson_sky_cathedral_bois_peint_en_noir_1958.png"
# (Réglages) Valeurs des marges
marge_gauche = 0.00 # marge en cm
marge_droite = 0.00 # marge en cm
marge_haute = 0.00 # marge en cm
marge_basse = 0.00 # marge en cm
# (Réglages) Taille de la police
# ------------------------------------------
# Texte partie générale (taille de la police)
taille_police_texte_general = 8 # (valeurs entre 6 et 10, 8 par défaut)
# Texte travaux d'artistes et évaluation (taille de la police)
taille_police_trav_artist = 2 # (valeurs 1 ou 2, 2 par défaut)
liste_vocab = ['<b><u>Mot de vocabulaire 1 :</u></b> La définition du mot de vocabulaire 1 ... '+'bla bla blablablabla '*10+'<br />', '<b><u>Mot de vocabulaire 2 :</u></b> La définition du mot de vocabulaire 2.<br />', '<b><u>Mot de vocabulaire 3 :</u></b> La définition du mot de vocabulaire 3 ...'+'bla bla blablablabla '*14+'<br />', '<b><u>Mot de vocabulaire 4 :</u></b> La définition du mot de vocabulaire 4 ...'+'bla bla blablablabla '*17+'<br />', '<b><u>Mot de vocabulaire 5 :</u></b> La définition du mot de vocabulaire 5.<br />', '<b><u>Mot de vocabulaire 6 :</u></b> La définition du mot de vocabulaire 6.<br />', '<b><u>Mot de vocabulaire 7 :</u></b> La définition du mot de vocabulaire 7 ...'+'bla bla blablablabla '*5+'<br />', '<b><u>Mot de vocabulaire 8 :</u></b> La définition du mot de vocabulaire 8.']
liste_questions = ["1) Est-ce que QPrinter est intéressant ?, ... oui en fait.<br />", "2) Est-ce que c'est facile à manipuler ?, ... oui quand on a compris comment ça fonctionne.<br />", "3) Est-ce que QPrinter est pratique ?, ... oui oui !, dans mon cas pour faire quelque chose en full Python, sans devoir appeler (et embarquer) un programme tiers."]
liste_textes_art = ['''* Tom Wesselmann, "Still Life 20" (1962). Résumé du contexte de l'oeuvre ... '''+"bla bla blablablabla "*16, '''* Georges Meliès, "Le voyage dans la lune" (1902). Résumé du contexte de l'oeuvre ...'''+"bla bla blablablabla "*18, '''* Louise Nevelson, "Sky cathedral" (1958) ... bois peint en noir ...'''+"bla bla blablablabla "*10]
liste_img_src_ref_art = ['''<img src="'''+img_ref_art_1+'''"'''+''' width="150" height="146" />''', '''<img src="'''+img_ref_art_2+'''"'''+''' width="150" height="100" />''', '''<img src="'''+img_ref_art_3+'''"'''+''' width="150" height="109" />''']
#############################################
# Appel de la classe
Html_vers_pdf_fiche_eleve(3, "2 à 3 séances", "" Une progression avec un titre "", 1, "" Une incitation qui n'en dit pas trop ! "", "La demande est faite pour faire en sorte que l'élève comprenne un peu plus ce qu'il y a à faire mais en même temps cette demande ne lui donne pas trop de pistes de travail afin que la situation problème (énoncée par l'incitation) soit tout de même présente."+" Blabla blablabla "+"blablabla blabla"*32, ''.join(liste_vocab), ''.join(liste_questions), liste_textes_art[0], liste_textes_art[1], liste_textes_art[2], liste_img_src_ref_art[0], liste_img_src_ref_art[1], liste_img_src_ref_art[2], marge_gauche, marge_haute, marge_droite, marge_basse, taille_police_texte_general, taille_police_trav_artist)
Output:
Maybe it's a PyQt5 bug at this level, or has the syntax for using QPrinter changed in margin management?
My version of Python: 3.4.3 and my version of PyQt5: 5.2.1.
Can you help me ?

sqlite3 crashes on "INSERT" query (python 3)

I have a Database Class built in Python 3.5.2, which I am built to have general functions other classes could use to connect to the DB.
I am using SQLite and SQLite Studio to check my work.
So far, I have successfully created create-table functions, as well as others that return the name of all tables in the database (in a list), and one that returns the names of all columns in any table of the DB.
The problem:
For some reason, the INSERT INTO method is crashing my DB (it takes a while, then it says it's locked). It creates a db-journal file in the same folder, I assume it's some kind of log.
I have substituted the execute lines for prints, to check that the query is ok. I have got the query it prints and put it into the query editor of sqlite Studio to see if they work, AND THEY DO!
The method uses a while loop to create a query for each insert I want to do, and I am quite sure there is something wrong with my method definition. It must be doing something that crashes de DB (maybe something memory related?)
Here is the code:
import sqlite3
class Database:
def __init__(self, name):
self.name=name
self.db_conn=sqlite3.connect(self.name+'.db')
self.cursor=self.db_conn.cursor()
self.commit=self.db_conn.commit()
def get_name(self):
return self.name
def get_tableNames(self):
#get table names into a list:
self.tables=[]
c=self.db_conn.execute("select name from sqlite_master where type = 'table'")
for row in c:
self.tables.append(row[0])
return self.tables
def get_tableColumns(self,tableName):
#get table columns into a list:
self.tableName=tableName
self.columns=[]
c=self.db_conn.execute("PRAGMA table_info("+ self.tableName+" );")
for row in c:
self.columns.append(row[1])
return self.columns
def create_table(self,table_name,*args):
#crear una tabla con nombre table_name y columns= cada uno de los argumentos de *fields.
#cada campo de args debe ser una lista.
self.table_name=table_name
fields=[]
for field in args:
fields.append(field)
i=0
try:
self.db_conn.execute("CREATE TABLE " +self.table_name+ "(ID INTEGER PRIMARY KEY AUTOINCREMENT);")
self.commit
except sqlite3.OperationalError:
print("La tabla "+self.table_name+" no se ha creado")
#añadimos las columnas a la tabla creada:
while i<len(fields):
if len(fields[i])==2:
try:
self.db_conn.execute("ALTER TABLE "+self.table_name+" ADD COLUMN "+fields[i][0]+" "+fields[i][1].upper()+";")
self.commit
i+=1
except sqlite3.OperationalError:
print("No se ha podido añadir la columna "+fields[i][0]+" a la tabla "+self.table_name)
i+=1
elif len(fields[i])==3:
try:
self.db_conn.execute("ALTER TABLE " +table_name+" ADD COLUMN " +fields[i][0]+" "+fields[i][1].upper()+" ("+str(fields[i][2])+") "+";")
self.commit
i+=1
except sqlite3.OperationalError:
print("No se ha podido añadir la columna "+fields[i][0]+" a la tabla "+self.table_name)
i+=1
else:
print('los argumentos deben ser listas donde el primer elemento será el nombre de la columna y los otros dos, el tipo y tamaño (de haberlo)')
def insert(self,tableName, *vals):
#insertar valores en tabla:
fields=self.get_tableColumns(tableName)
rows=[]
for row in vals:
rows.append(row)
r=0
while r<len(rows):
i=1
query="INSERT INTO "+self.tableName+" ("
while i<len(fields):
query=query+str(fields[i]+',')
i+=1
query=query[:-1]
query=query+') VALUES ('
i=0
while i<len(rows[r]):
if type(rows[r][i]) is str:
query=query+str("'"+rows[r][i]+"'"+',')
else:
query=query+str(rows[r][i])+','
i+=1
query=query[:-1]+');'
print(query)
print('commit')
try:
self.db_conn.execute(query)
self.commit
print("se han añadido los datos a la tabla")
except sqlite3.OperationalError:
print('no se han podido añadir los valores específicos a la tabla '+self.tableName)
r+=1
#esta función peta la DB y por eso no funcionan las queries.
def close(self):
self.db_conn.close()
closed= 'database closed'
return closed
In order to make it work:
db=Database('stock-data')
db.create_table('test',['name','text',50],['age','integer'])
db.insert('test',['john',20],['will',21])
This last expression is the one that crashes.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Python scraping regex (word just next to the number) - python-3.x

Related

Python Code only running in Debugging mode

Use of dictionaries with lists in Python

cleaning multi terms fron stopwords

QPrinter margins under Py3 and PyQt5

sqlite3 crashes on "INSERT" query (python 3)

Categories

Resources