Extending current code to include both median and mode - python-3.x

I have this line of code that i used for one assignment, but i can't figure out how to add the median and mode into the code to let it run without error.
def main():
filename = input('File name: ')
num=0
try:
infile = open(filename, 'r')
count = 0
total = 0.0
average = 0.0
maximum = 0
minimum = 0
range1 = 0
for line in infile:
num = int(line)
count = count + 1
total = total + num
if count == 1:
maximum = num
minimum = num
else:
if num > maximum:
maximum = num
if num < minimum:
minimum = num
if count > 0:
average = total / count
range1 = maximum - minimum

I'll jump right in and show you the code. It's a very simple and quite pythonic solution.
Solution
import statistics
def open_file(filename):
try:
return open(filename, 'r')
except OSError as e:
print(e)
return None
def main():
# Read file. Note that we are trusting the user input here without sanitizing.
fd = open_file(input('File name: '))
if fd is None: # Ensure we have a file descriptor
return
data = fd.read() # Read whole file
if data == '':
print("No data in file")
return
lines = data.split('\n') # Split the data into a list of strings
# We need to convert the list of strings to a list of integers
# I don't know a pythonic way of doing this.
for number, item in enumerate(lines):
lines[number] = int(item)
total_lines = len(lines)
total_sum = sum(lines)
maximum = max(lines)
minimum = min(lines)
# Here is the python magic, no need to reinvent the wheel!
mean = statistics.mean(lines) # mean == average
median = statistics.median(lines)
mode = "No mode!"
try:
mode = statistics.mode(lines)
except statistics.StatisticsError as ec:
pass # No mode, due to having the same quantity of 2 or more different values
print("Total lines: " + str(total_lines))
print("Sum: " + str(total_sum))
print("Max: " + str(maximum))
print("Min: " + str(minimum))
print("Mean: " + str(mean))
print("Median: " + str(median))
print("Mode: " + str(mode))
if __name__ == '__main__':
main()
Explanation
Generally, in python, it's safe to assume that if you want to calculate any mundane value using a well known algorithm, there will already be a function written for you to do just that. No need to reinvent the wheel!
These functions aren't usually hard to find online either. For instance, you can find suggestions regarding the statistics library by googling python calculate the median
Although you have the solution, I strongly advise looking through the source code of the statistics library (posted below), and working out how these functions work for yourself. It will help your grow as a developer and mathematician.
statistics.py
mean
def mean(data):
"""Return the sample arithmetic mean of data.
>>> mean([1, 2, 3, 4, 4])
2.8
>>> from fractions import Fraction as F
>>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)])
Fraction(13, 21)
>>> from decimal import Decimal as D
>>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")])
Decimal('0.5625')
If ``data`` is empty, StatisticsError will be raised.
"""
if iter(data) is data:
data = list(data)
n = len(data)
if n < 1:
raise StatisticsError('mean requires at least one data point')
T, total, count = _sum(data)
assert count == n
return _convert(total/n, T)
median
def median(data):
"""Return the median (middle value) of numeric data.
When the number of data points is odd, return the middle data point.
When the number of data points is even, the median is interpolated by
taking the average of the two middle values:
>>> median([1, 3, 5])
3
>>> median([1, 3, 5, 7])
4.0
"""
data = sorted(data)
n = len(data)
if n == 0:
raise StatisticsError("no median for empty data")
if n%2 == 1:
return data[n//2]
else:
i = n//2
return (data[i - 1] + data[i])/2
mode
def mode(data):
"""Return the most common data point from discrete or nominal data.
``mode`` assumes discrete data, and returns a single value. This is the
standard treatment of the mode as commonly taught in schools:
>>> mode([1, 1, 2, 3, 3, 3, 3, 4])
3
This also works with nominal (non-numeric) data:
>>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
'red'
If there is not exactly one most common value, ``mode`` will raise
StatisticsError.
"""
# Generate a table of sorted (value, frequency) pairs.
table = _counts(data)
if len(table) == 1:
return table[0][0]
elif table:
raise StatisticsError(
'no unique mode; found %d equally common values' % len(table)
)
else:
raise StatisticsError('no mode for empty data')

Related

Running out of index in binary search

###Binary Search###
def search(list1,n):
l=0
u=len(list1)
print(u)
while l<=u:
mid = (l+u)//2
if list1[mid]==n:
global index1
index = mid
return True
else:
if list1[mid]<n:
l = list1[mid]
else:
u = list1[mid]
list1 = [4,7,8,12,45,99.102,702,10987,56666]
n = 12
list1.sort()
if search(list1, n):
print("Found at ",index)
else:
print("Not Found")
The error I am getting is:
line 26, in <module> if search(list1, n): line 11, in search if list1[mid]==n: IndexError: list index out of range
I am assuming your the u in your code is supposed to be the high pointer. In that case it should be initialized to len(list1) - 1 and in the end the left and right pointers should be set to l = list1[mid+1] and u = list1[mid-1]. For binary search its always easier to implement the recursive method. Find out more info here

Index going out of range in bisect_left in Python 3

I'm writing this piece of code, in which I've used bisect_left function from the bisect module which is a first-party module of Python. I'm using it with two parameters only i.e. sorted_list and target(the one for which I have to find the suitable index value).
The issue is: If my target is greater than the sum of lowest value and highest value, the function is returning the index = len(sorted_li), due to which I'm getting index error. I can use try and except but more than that I'm curious to know why it is behaving like so.
Following is my code:
from bisect import bisect_left
li = [10,15,3,6,10]
k = 19
def binary_search(sorted_list,target):
index = bisect_left(sorted_list,target)
print(index)
if sorted_list[index] == target:
return index
else:
return False
def function(sorted_li,k):
"""
Given a list of numbers and a number k, return whether any two numbers from the list add up to k.
For example, given [10, 15, 3, 7] and k of 17, return true since 10 + 7 is 17.
"""
print(sorted_li)
for i in range(len(sorted_li)):
print('Next iteration')
print(sorted_li[i])
target = k - sorted_li[i]
j = binary_search(sorted_li,target)
if j:
if j != i:
print(sorted_li[i])
print(sorted_li[j])
return True
else:
if j + 1 < len(sorted_li):
if sorted_li[j+1] == target:
print(sorted_li[i])
print(sorted_li[j+1])
return True
if j - 1 > 0:
if sorted_li[j-1] == target:
print(sorted_li[i])
print(sorted_li[j-1])
return True
return False
if __name__ == "__main__":
li.sort()
a = function(li,k)
print(a)
It's output is as follows:
but when I'm changing k to 18, the code is working fine, the output is as follows:
I've tried with various sets of numbers for the same. The output remains the same.
You're using bisect_left which has next purpose: it looking for the insertion point for x (which is target in your case) in a to maintain sorted order.
So for your case when you call first binary_search first time for 16 (19 - 3), it compare your number with items in li list using binary algorithm and then it returns position for insert 5, because in your list [3, 6, 10, 10, 15] insertion point should be after 15 which is correct.
If you open documentation you can find next method in searching sorted list
which does exactly you need, it searching for the exact item in list and return position of it if it exists either it raises ValueError because item not found.
def index(a, x):
'Locate the leftmost value exactly equal to x'
i = bisect_left(a, x)
if i != len(a) and a[i] == x:
return i
raise ValueError

How does a value from a previous level in recursion go back up?

I'm trying to make a recursive function to get minimum number of coins for change, but I think my understanding of what each layer's return value in the stack is wrong. What I want is for the coin amount to be passed back up when the recursion reaches it's base case, but looking at the debugger, the coin case decreases on the way back up.
I've already tried to look at solutions for this problem, but they all seem to use dynamic programming, and I know that it's more efficient in terms of complexity, but I want to figure out how to do the recursion before adding the dynamic programming portion
def min_coin(coin_list, value, counter = 0):
if value == 0:
return 0
else:
for coin in coin_list:
if coin <= value:
sub_result = value - coin
min_coin(coin_list, sub_result, counter)
counter +=1
return counter
#counter += 1 #should add returning out from,
#return counter
coin_list = [5, 2, 1]
value = 8
print(min_coin(coin_list,value))
I want an output of 3, but the actual output is 1 no matter the value
You need to increment the counter before calling min_coin().
def min_coin(coin_list, value, counter = 0):
if value == 0:
return counter
else:
for coin in coin_list:
if coin <= value:
sub_result = value - coin
return min_coin(coin_list, sub_result, counter+1)
You can solve your task without recursion, answer from geekforcoders
# Python 3 program to find minimum
# number of denominations
def findMin(V):
# All denominations of Indian Currency
deno = [1, 2, 5, 10, 20, 50,
100, 500, 1000]
n = len(deno)
# Initialize Result
ans = []
# Traverse through all denomination
i = n - 1
while(i >= 0):
# Find denominations
while (V >= deno[i]):
V -= deno[i]
ans.append(deno[i])
i -= 1
# Print result
for i in range(len(ans)):
print(ans[i], end = " ")
# Driver Code
if __name__ == '__main__':
n = 93
print("Following is minimal number",
"of change for", n, ": ", end = "")
findMin(n)

Select a number randomly with probability proportional to its magnitude from the given array of n elements

Ex 1: A = [0 5 27 6 13 28 100 45 10 79]
let f(x) denote the number of times x getting selected in 100 experiments.
f(100) > f(79) > f(45) > f(28) > f(27) > f(13) > f(10) > f(6) > f(5) > f(0)
My code:
def pick_a_number_from_list(A,l):
Sum = 0
#l = len(A)
for i in range(l):
Sum+=A[i]
A_dash = []
for i in range(l):
b=A[i]/Sum
A_dash.append(b)
#print(A_dash)
series = pd.Series(A_dash)
cumsum = series.cumsum(skipna=False)
#print(cumsum[9])
sample_value = uniform(0.0,1.0)
r = sample_value
print(r)
#for i in range(l):
if r<cumsum[1]:
return 1
elif r>cumsum[1] and r <cumsum[2]:
return 2
elif r<cumsum[3]:
return 3
elif r<cumsum[4]:
return 4
elif r<cumsum[5]:
return 5
elif r<cumsum[6]:
return 6
elif r<cumsum[7]:
return 7
elif r<cumsum[8]:
return 8
elif r<cumsum[9]:
return 9
def sampling_based_on_magnitued():
A = [0,5,27,6,13,28,100,45,10,79]
n = len(A)
#for i in range(1,10):
num = pick_a_number_from_list(A,n)
print(A[num])
sampling_based_on_magnitued()
In mu code i am using multiple if else statement and because it is hardcoded
i can make by o/p right till 10 element in the list.
I want to make my code dynamic for any value in the list.
Here in my code i have restricted it to n=10
Pls tell me how can i right generic code which can replace all if - elseif statement with for loop
sum1=0;
for i in A:
sum1+=i;
x=0
list1=[]
for i in A:
list1.append(x+i/sum1)
x=x+i/sum1;
#list1 contsins cumulative sum
bit=uniform(0,1)
for i in range (0,len(list1)):
if bit<list1[i]:
return A[i]
you may use this
you can use random.choices
A = [0,5, 27, 6, 13, 28, 100, 45, 10, 79]
let no of random values want to pick it be 100 s0 k=100
w = [0.0, 0.01597444089456869, 0.08626198083067092, 0.019169329073482427, 0.04153354632587859, 0.08945686900958466, 0.3194888178913738, 0.14376996805111822, 0.03194888178913738, 0.2523961661341853]
weights is calculsted by using A[i]/(total sum of all the values of A)
x = random.choices(A,w,k=100)
print(x)
it displays the values from list A according to there weights
Some changes in Bitan Guha Roy's code to return just one value
import numpy as np
sum1=0;
for i in A:
sum1+=i;
x=0
list1=[]
for i in A:
list1.append(x+i/sum1)
x=x+i/sum1;
# list1 contains cumulative sum
bit=np.random.uniform(0.0,1.0,1)
for i in range (0,len(list1)):
if bit>=list1[i] and bit<list1[i+1]:
print(A[i+1]) # or return if under a function
import random
lst=[0, 5 ,27, 6, 13, 28, 100, 45, 10,79]
def pick_a_number_from_list(A):
weights1=[]
for i in A:
weights1.append(i/sum(lst))
selected_random_number = random.choices(A,weights=weights1,k=1)
return selected_random_number
def sampling_based_on_magnitued():
for i in range(1,100):
number=pick_a_number_from_list(lst)
print(number)
sampling_based_on_magnitued()
# used random.choices which gives option to choose random number according respective weights. Please suggest any modification if you've any

iteration over a sequence with an implicit type in Python 3.6

I am trying to iterate over a sequence of numbers. I have this:
from itertools import islice, count
handle = int(input("Please enter a number:")
handler = str(handle)
parameter = []
for i in handler:
parameter.append(i)
print(parameter) #This was for debugging
revised = parameter(count(1[2])) #I'm not sure I'm using the correct syntax here, the purpose is to make revised == parameter[0] and parameter[2]
Ultimately, what I am trying to achieve is to take a sequence of numbers or two, and compare them. For instance, if i[0] == i[1] + i [2] I want to return True, or for that matter if i[0] == i[1] - i[2]. I want the program to iterate over the entire sequence, checking for these types of associations, for instance, 23156 would == true because 2*3 = 6, 2+3 = 5, 5+1 = 6, 2+3+1=6; etc. It's strictly for my own purposes, just trying to make a toy.
When I utilize
revised = parameter(count(1[2])
I am getting an error that says builtins. TYPEERROR, type int is not subscriptable but I explicitly turned the integer input into a string.
Albeit unclear, what you have attempted to describe is hard to explain. It appears to be akin to a Running Total but with restrictions and of various operations, i.e. addition, subtraction and products.
Restrictions
The first two numbers are seeds
The following numbers must accumulate by some operation
The accumulations must progress contiguously
Code
import operator as op
import itertools as it
def accumulate(vals):
"""Return a set of results from prior, observed operations."""
adds = set(it.accumulate(vals)) # i[0] == i[1] + i[2]
muls = set(it.accumulate(vals, op.mul)) # i[0] == i[1] * i[2]
subs = {-x for x in it.accumulate(vals, func=op.sub)} # i[0] == i[1] - i[2]
#print(adds, muls, subs)
return adds | muls | subs
def rolling_acc(vals):
"""Return accumulations by sweeping all contiguous, windowed values."""
seen = set()
for i, _ in enumerate(vals):
window = vals[i:]
if len(window) >= 3:
seen |= accumulate(window)
return seen
def is_operable(vals):
"""Return `True` if rolling operations on contiguous elements will be seen."""
s = str(vals)
nums = [int(x) for x in s]
ahead = nums[2:]
accums = rolling_acc(nums)
#print(ahead, accums)
return len(set(ahead) & accums) == len(ahead)
Tests
assert is_operable(23156) == True
assert is_operable(21365) == False # {2,3} non-contiguous
assert is_operable(2136) == True
assert is_operable(11125) == True

Resources