Printing list in different columns - python-3.x

I am quite new to Python and I am now struggling with printing my list in columns. It prints my lists in one columns only but I want it printed under 4 different titles. I know am missing something but can't seem to figure it out. Any advice would be really appreciated!
def createMyList():
myAgegroup = ['20 - 39','40 - 59','60 - 79']
mygroupTitle = ['Age','Underweight','Healthy','Overweight',]
myStatistics = [['Less than 21%','21 - 33','Greater than 33%',],['Less than 23%','23 - 35','Greater than 35%',],['Less than 25%','25 - 38','Greater than 38%',]]
printmyLists(myAgegroup,mygroupTitle,myStatistics)
return
def printmyLists(myAgegroup,mygroupTitle,myStatistics):
print(': Age : Underweight : Healthy : Overweight :')
for count in range(0, len(myAgegroup)):
print(myAgegroup[count])
for count in range(0, len(mygroupTitle)):
print(mygroupTitle[count])
for count in range(0, len(myStatistics)):
print(myStatistics[0][count])
return
createMyList()

To print data in nice columns is nice to know Format Specification Mini-Languag (doc). Also, to group data together, look at zip() builtin function (doc).
Example:
def createMyList():
myAgegroup = ['20 - 39','40 - 59','60 - 79']
mygroupTitle = ['Age', 'Underweight','Healthy','Overweight',]
myStatistics = [['Less than 21%','21 - 33','Greater than 33%',],['Less than 23%','23 - 35','Greater than 35%',],['Less than 25%','25 - 38','Greater than 38%',]]
printmyLists(myAgegroup,mygroupTitle,myStatistics)
def printmyLists(myAgegroup,mygroupTitle,myStatistics):
# print the header:
for title in mygroupTitle:
print('{:^20}'.format(title), end='')
print()
# print the columns:
for age, stats in zip(myAgegroup, myStatistics):
print('{:^20}'.format(age), end='')
for stat in stats:
print('{:^20}'.format(stat), end='')
print()
createMyList()
Prints:
Age Underweight Healthy Overweight
20 - 39 Less than 21% 21 - 33 Greater than 33%
40 - 59 Less than 23% 23 - 35 Greater than 35%
60 - 79 Less than 25% 25 - 38 Greater than 38%

Related

Add suffix to a specific row in pandas dataframe

Im trying to add suffix to % Paid row in the dataframe, but im stuck with only adding suffix to the column names.
is there a way i can add suffix to a specific row values,
Any suggestions are highly appreciated.
d={
("Payments","Jan","NOS"):[],
("Payments","Feb","NOS"):[],
("Payments","Mar","NOS"):[],
}
d = pd.DataFrame(d)
d.loc["Total",("Payments","Jan","NOS")] = 9991
d.loc["Total",("Payments","Feb","NOS")] = 3638
d.loc["Total",("Payments","Mar","NOS")] = 5433
d.loc["Paid",("Payments","Jan","NOS")] = 139
d.loc["Paid",("Payments","Feb","NOS")] = 123
d.loc["Paid",("Payments","Mar","NOS")] = 20
d.loc["% Paid",("Payments","Jan","NOS")] = round((d.loc["Paid",("Payments","Jan","NOS")] / d.loc["Total",("Payments","Jan","NOS")])*100)
d.loc["% Paid",("Payments","Feb","NOS")] = round((d.loc["Paid",("Payments","Feb","NOS")] / d.loc["Total",("Payments","Feb","NOS")])*100)
d.loc["% Paid",("Payments","Mar","NOS")] = round((d.loc["Paid",("Payments","Mar","NOS")] / d.loc["Total",("Payments","Mar","NOS")])*100)
without suffix
I tried this way, it works but.. im looking for adding suffix for an entire row..
d.loc["% Paid",("Payments","Jan","NOS")] = str(round((d.loc["Paid",("Payments","Jan","NOS")] / d.loc["Total",("Payments","Jan","NOS")])*100)) + '%'
d.loc["% Paid",("Payments","Feb","NOS")] = str(round((d.loc["Paid",("Payments","Feb","NOS")] / d.loc["Total",("Payments","Feb","NOS")])*100)) + '%
d.loc["% Paid",("Payments","Mar","NOS")] = str(round((d.loc["Paid",("Payments","Mar","NOS")] / d.loc["Total",("Payments","Mar","NOS")])*100)) + '%'
with suffix
Select row separately by first index value, round and convert to integers, last to strings and add %:
d.loc["% Paid"] = d.loc["% Paid"].round().astype(int).astype(str).add(' %')
print (d)
Payments
Jan Feb Mar
NOS NOS NOS
Total 9991.0 3638.0 5433.0
Paid 139.0 123.0 20.0
% Paid 1 % 3 % 0 %

How to make my code work more efficiently

I made this grade calculator that calculates the grade form my school website. There are some minor flaws that do not affect the code, but does bug me. For example, When I paste the grade from the website, I have to press enter twice, which people who are using my program for the first time get confused with. Also, I want to make it so that the code does not create errors when I press enter without any data given in input.
Not only, these, but I want some feedback on my code and how I can improve them.
This is the code I worked on so far.
class color:
reset = '\\033\[0m'
class fg:
green = '\\033\[32m'
orange = '\\033\[33m'
\#getting input from user
def multi_input():
try:
while True:
data=input(color.fg.orange)
if not data: break
yield data
except KeyboardInterrupt:
return
restart=1
while restart!="x":
score = \[\]
percent = \[\]
add = \[\]
print(color.fg.green + "Enter Grade" + color.reset)
data = 0
data = list(multi_input())
#filter data into percent and score
for i in range(3, len(data),4):
data[i] = data[i].split('\t')
try:
percent.append(data[i][3])
score.append(data[i][4])
except IndexError:
result = 0
#take out ungraded values
percent = [value for value in percent if value != '']
score = [value for value in score if value != '']
#refine percent data
for i in range(len(percent)):
try:
percent[i] = percent[i].replace('%', '')
percent[i] = float(percent[i])
except ZeroDivisionError:
result = 0
#refine score data
for i in range(len(score)):
score[i] = score[i].split('/')
for j in range(len(score[i])):
score[i][j] = float(score[i][j])
try:
score[i] = score[i][0]/score[i][1]*100
except ZeroDivisionError:
result = 0
#amount of assignments
print()
print(color.fg.green + "graded assignments: ", len(score))
#calculation
for i in range(len(score)):
add.append(score[i]*percent[i]/100)
print(color.fg.green + "Percentage: ", f"{sum(add)/sum(percent)*100:05.2f}" + color.reset)
restart = input(color.fg.green + "press any key to start again, or x to exit.")
print()
This is a sample grade copied from my school website so that you can test my code out.
Nov
02
Projects & Labs
4.2 If statements & 4.3 Comparison A 1% 100/100 11/2/22
Nov
02
Quiz
4.2 If statements & 4.3 Comparison Quizzes A 0.4% 100/100 11/2/22
Oct
31
Projects & Labs
4.1 Booleans Labs A 1% 100/100 10/31/22
Oct
31
Quiz
4.1 Boolean Quiz A 0.4% 100/100 10/31/22
Oct
24
Exams & Tests
Python Console & Interaction Module Test A 12.5% 200/200 Test 18/20 Programming: 100(Extra 10 points Extra credit) 10/24/22
Oct
24
Homework
Study for Python Console & Interaction Quiz & Programming Test: Ungraded (Ungraded)
Oct
21
Projects & Labs
3.6 Comments Quiz & Lab C 1% 75/100 Quiz = 1/2 10/26/22
Oct
21
Projects & Labs
3.5 String Operators Labs A 2% 200/200 no screenshot of recipe 10/24/22

Parsing heterogenous data from a text file in Python

I am trying to parse raw data results from a text file into an organised tuple but having trouble getting it right.
My raw data from the textfile looks something like this:
Episode Cumulative Results
EpisodeXD0281119
Date collected21/10/2019
Time collected10:00
Real time PCR for M. tuberculosis (Xpert MTB/Rif Ultra):
PCR result Mycobacterium tuberculosis complex NOT detected
Bacterial Culture:
Bottle: Type FAN Aerobic Plus
Result No growth after 5 days
EpisodeST32423457
Date collected23/02/2019
Time collected09:00
Gram Stain:
Neutrophils Occasional
Gram positive bacilli Moderate (2+)
Gram negative bacilli Numerous (3+)
Gram negative cocci Moderate (2+)
EpisodeST23423457
Date collected23/02/2019
Time collected09:00
Bacterial Culture:
A heavy growth of
1) Klebsiella pneumoniae subsp pneumoniae (KLEPP)
ensure that this organism does not spread in the ward/unit.
A heavy growth of
2) Enterococcus species (ENCSP)
Antibiotic/Culture KLEPP ENCSP
Trimethoprim-sulfam R
Ampicillin / Amoxic R S
Amoxicillin-clavula R
Ciprofloxacin R
Cefuroxime (Parente R
Cefuroxime (Oral) R
Cefotaxime / Ceftri R
Ceftazidime R
Cefepime R
Gentamicin S
Piperacillin/tazoba R
Ertapenem R
Imipenem S
Meropenem R
S - Sensitive ; I - Intermediate ; R - Resistant ; SDD - Sensitive Dose Dependant
Comment for organism KLEPP:
** Please note: this is a carbapenem-RESISTANT organism. Although some
carbapenems may appear susceptible in vitro, these agents should NOT be used as
MONOTHERAPY in the treatment of this patient. **
Please isolate this patient and practice strict contact precautions. Please
inform Infection Prevention and Control as contact screening might be
indicated.
For further advice on the treatment of this isolate, please contact.
The currently available laboratory methods for performing colistin
susceptibility results are unreliable and may not predict clinical outcome.
Based on published data and clinical experience, colistin is a suitable
therapeutic alternative for carbapenem resistant Acinetobacter spp, as well as
carbapenem resistant Enterobacteriaceae. If colistin is clinically indicated,
please carefully assess clinical response.
EpisodeST234234057
Date collected23/02/2019
Time collected09:00
Authorised by xxxx on 27/02/2019 at 10:35
MIC by E-test:
Organism Klebsiella pneumoniae (KLEPN)
Antibiotic Meropenem
MIC corrected 4 ug/mL
MIC interpretation Resistant
Antibiotic Imipenem
MIC corrected 1 ug/mL
MIC interpretation Sensitive
Antibiotic Ertapenem
MIC corrected 2 ug/mL
MIC interpretation Resistant
EpisodeST23423493
Date collected18/02/2019
Time collected03:15
Potassium 4.4 mmol/L 3.5 - 5.1
EpisodeST45445293
Date collected18/02/2019
Time collected03:15
Creatinine 32 L umol/L 49 - 90
eGFR (MDRD formula) >60 mL/min/1.73 m2
Creatinine 28 L umol/L 49 - 90
eGFR (MDRD formula) >60 mL/min/1.73 m2
Essentially the pattern is that ALL information starts with a unique EPISODE NUMBER and follows with a DATE and TIME and then the result of whatever test. This is the pattern throughout.
What I am trying to parse into my tuple is the date, time, name of the test and the result - whatever it might be. I have the following code:
with open(filename) as f:
data = f.read()
data = data.splitlines()
DS = namedtuple('DS', 'date time name value')
parsed = list()
idx_date = [i for i, r in enumerate(data) if r.strip().startswith('Date')]
for start, stop in zip(idx_date[:-1], idx_date[1:]):
chunk = data[start:stop]
date = time = name = value = None
for row in chunk:
if not row: continue
row = row.strip()
if row.startswith('Episode'): continue
if row.startswith('Date'):
_, date = row.split()
date = date.replace('collected', '')
elif row.startswith('Time'):
_, time = row.split()
time = time.replace('collected', '')
else:
name, value, *_ = row.split()
print (name)
parsed.append(DS(date, time, name, value))
print(parsed)
My error is that I am unable to find a way to parse the heterogeneity of the test RESULT in a way that I can use later, for example for the tuple DS ('DS', 'date time name value'):
DATE = 21/10/2019
TIME = 10:00
NAME = Real time PCR for M tuberculosis or Potassium
RESULT = Negative or 4.7
Any advice appreciated. I have hit a brick wall.

How to use custom mean, median, mode functions with array of 2500 in python?

So I am trying to solve mean, median and mode challenge on Hackerrank. I defined 3 functions to calculate mean, median and mode for a given array with length between 10 and 2500, inclusive.
I get an error with an array of 2500 integers, not sure why. I looked into python documentation and found no mentions of max length for lists. I know I can use statistics module but trying the hard way and being stubborn I guess. Any help and criticism is appreciated regarding my code. Please be honest and brutal if need be. Thanks
N = int(input())
var_list = [int(x) for x in input().split()]
def mean(sample_list):
mean = sum(sample_list)/N
print(mean)
return
def median(sample_list):
sorted_list = sorted(sample_list)
if N%2 != 0:
median = sorted_list[(N//2)]
else:
median = (sorted_list[N//2] + sorted_list[(N//2)-1])/2
print(median)
return
def mode(sample_list):
sorted_list = sorted(sample_list)
mode = min(sorted_list)
max_count = sorted_list.count(mode)
for i in sorted_list:
if (i <= mode) and (sorted_list.count(i) >= max_count):
mode = i
print(mode)
return
mean(var_list)
median(var_list)
mode(var_list)
Compiler Message
Wrong Answer
Input (stdin)
2500
19325 74348 68955 98497 26622 32516 97390 64601 64410 10205 5173 25044 23966 60492 71098 13852 27371 40577 74997 42548 95799 26783 51505 25284 49987 99134 33865 25198 24497 19837 53534 44961 93979 76075 57999 93564 71865 90141 5736 54600 58914 72031 78758 30015 21729 57992 35083 33079 6932 96145 73623 55226 18447 15526 41033 46267 52486 64081 3705 51675 97470 64777 31060 90341 55108 77695 16588 64492 21642 56200 48312 5279 15252 20428 57224 38086 19494 57178 49084 37239 32317 68884 98127 79085 77820 2664 37698 84039 63449 63987 20771 3946 862 1311 77463 19216 57974 73012 78016 9412 90919 40744 24322 68755 59072 57407 4026 15452 82125 91125 99024 49150 90465 62477 30556 39943 44421 68568 31056 66870 63203 43521 78523 58464 38319 30682 77207 86684 44876 81896 58623 24624 14808 73395 92533 4398 8767 72743 1999 6507 49353 81676 71188 78019 88429 68320 59395 95307 95770 32034 57015 26439 2878 40394 33748 41552 64939 49762 71841 40393 38293 48853 81628 52111 49934 74061 98537 83075 83920 42792 96943 3357 83393{-truncated-}
Download to view the full testcase
Expected Output
49921.5
49253.5
2184
Your issue seems to be that you are actually using standard list operations rather than calculating things on the fly, while looping through the data once (for the average). sum(sample_list) will almost surely give you something which exceeds the double-limit, i.a.w. it becomes really big.
Further reading
Calculating the mean, variance, skewness, and kurtosis on the fly
How do I determine the standard deviation (stddev) of a set of values?
Rolling variance algorithm
What is a good solution for calculating an average where the sum of all values exceeds a double's limits?
How do I determine the standard deviation (stddev) of a set of values?
How to efficiently compute average on the fly (moving average)?
I figured out that you forgot to change the max_count variable inside the if block. Probably that causes the wrong result. I tested the debugged version on my computer and they seem to work well when I compare their result with the scipy's built-in functions. The correct mode function should be
def mode(sample_list):
N = len(sample_list)
sorted_list = sorted(sample_list)
mode = min(sorted_list)
max_count = sorted_list.count(mode)
for i in sorted_list:
if (sorted_list.count(i) >= max_count):
mode = i
max_count = sorted_list.count(i)
print(mode)
I was busy with some stuff and now came back to completing this. I am happy to say that I have matured enough as a coder and solved this issue.
Here is the solution:
# Enter your code here. Read input from STDIN. Print output to STDOUT
# Input an array of numbers, convert it to integer array
n = int(input())
my_array = list(map(int, input().split()))
my_array.sort()
# Find mean
array_mean = sum(my_array) / n
print(array_mean)
# Find median
if (n%2) != 0:
array_median = my_array[n//2]
else:
array_median = (my_array[n//2 - 1] + my_array[n//2]) / 2
print(array_median)
# Find mode(I could do this using multimode method of statistics module for python 3.8)
def sort_second(array):
return array[1]
modes = [[i, my_array.count(i)] for i in my_array]
modes.sort(key = sort_second, reverse=True)
array_mode = modes[0][0]
print(array_mode)

python3: Counting repeated occurrence in a list

Each line contains a special timestamp, the caller number, the receiver number, the duration of the call in seconds and the rate per minute in cents at which this call was charged, all separated by ";”. The file contains thousands of calls looks like this. I created a list instead of a dictionary to access the elements but I'm not sure how to count the number of calls originating from the phone in question
timestamp;caller;receiver;duration;rate per minute
1419121426;7808907654;7807890123;184;0.34
1419122593;7803214567;7801236789;46;0.37
1419122890;7808907654;7809876543;225;0.31
1419122967;7801234567;7808907654;419;0.34
1419123462;7804922860;7809876543;782;0.29
1419123914;7804321098;7801234567;919;0.34
1419125766;7807890123;7808907654;176;0.41
1419127316;7809876543;7804321098;471;0.31
Phone number || # |Duration | Due |
+--------------+-----------------------
|(780) 123 4567||384|55h07m53s|$ 876.97|
|(780) 123 6789||132|17h53m19s|$ 288.81|
|(780) 321 4567||363|49h52m12s|$ 827.48|
|(780) 432 1098||112|16h05m09s|$ 259.66|
|(780) 492 2860||502|69h27m48s|$1160.52|
|(780) 789 0123||259|35h56m10s|$ 596.94|
|(780) 876 5432||129|17h22m32s|$ 288.56|
|(780) 890 7654||245|33h48m46s|$ 539.41|
|(780) 987 6543||374|52h50m11s|$ 883.72|
list =[i.strip().split(";") for i in open("calls.txt", "r")]
print(list)
I have very simple solution for your issue:
First of all use with when opening file - it's a handy shortcut and it provides sames functionality as wrap this funtion into try...except. Consider this:
lines = []
with open("test.txt", "r") as f:
for line in f.readlines():
lines.append(line.strip().split(";"))
print(lines)
counters = {}
# you browse through lists and later through numbers inside lists
for line in lines:
for number in line:
# very basic way to count occurences
if number not in counters:
counters[number] = 1
else:
counters[number] += 1
# in this condition you can tell what number of digits you accept
counters = {elem: counters[elem] for elem in counters.keys() if len(elem) > 5}
print(counters)
This should get you started
import csv
import collections
Call = collections.namedtuple("Call", "duration rate time")
calls = {}
with open('path/to/file') as infile:
for time, nofrom, noto, dur, rate in csv.reader(infile):
calls.get(nofrom, {}).get(noto,[]).append(Call(dur, rate, time))
for nofrom, logs in calls.items():
for noto, callist in logs.items():
print(nofrom, "called", noto, len(callist), "times")

Resources