AttributeError: 'str' object has no attribute 'readlines' - string

Having trouble on my code.
I am getting this AttributeError and i don't know why.
Someone provide a little insight please and thanks!!!
This is written in python 3,
i am attempting to make a chart.
import sys
data = {}
def main():
filename = sys.argv[1]
parseFile(filename)
function()
def parseFile(fn):
print("Parsing", fn)
infile = open(fn, "r")
for line in infile:
line = line[:-1]
tokens = line.split()
print(tokens)
if line[0]=="#":
line.readline() #<-- this is my problem line
rsid = (tokens[0])
genotype = (tokens[3])
data[rsid] = genotype
infile.close()
main()
# This data file generated by 23andMe at: Wed Jan 26 05:37:08 2011
#
# Below is a text version of your data. Fields are TAB-separated
# Each line corresponds to a single SNP. For each SNP, we provide its identifier
# (an rsid or an internal id), its location on the reference human genome, and the
# genotype call oriented with respect to the plus strand on the human reference
# sequence. We are using reference human assembly build 36. Note that it is possible
# that data downloaded at different times may be different due to ongoing improvements
# in our ability to call genotypes. More information about these changes can be found at:
# https://www.23andme.com/you/download/revisions/
#
# More information on reference human assembly build 36:
# http://www.ncbi.nlm.nih.gov/projects/mapview/map_search.cgi?taxid=9606&build=36
#
# rsid chromosome position genotype
rs4477212 1 72017 AA
rs3094315 1 742429 AA
rs1799883 1 742429 AA
rs3131972 1 742584 GG
rs12124819 1 766409 AA
rs11240777 1 788822 GG
rs6681049 1 789870 CC
rs4970383 1 828418 CC
rs4475691 1 836671 CC
rs7537756 1 844113 AA

the attribute error means that readline is not a string method.
in this snippet:
for line in infile:
line = line[:-1]
tokens = line.split()
I am guessing (correctly?) that line[:-1] is to strip off a line feed. If that is the case, try this instead:
for line in infile:
line = line.strip()
tokens = line.split()
strip will strip off new-line and carriage returns. and since you've just altered line to be just the text without the line-feed, you can delete your line with line.readline().
Update:
to skip lines beginning with #
for line in infile:
line = line.strip()
if line[0]=="#":
continue
tokens = line.split()
by "skip", I take it to mean that you want to ignore them.
also, for good form, you should have in your parseFile function the line:
def parseFile(fn):
global data
...

Related

Comparison script diffrent files

i am trying to write a script that compares a bunch of files based on a search word, in this case i searched for 106, then i want the code to match the words from file 1 to the words in file 2 and print a list with the ones that dont match.
For example in file A i have this line
106_LB01_GP61_HAL;LB01;10892;DIGITAL;0;0;0;0;;;Smutsigt tilluftsfilter;;
and in file B i have
"Prefix": "106_LB01_GP61",
those lines match and then i want it to ignore that tag
when the script find lines that dont match etc when a tag in file A cant fint its buddy in file B i want it to write those tags to a file,
for example:
Total unused tags:1
106_LB01_GP61
right now i am stuc at making it read to diffrent files at the same time
#!/usr/bin/env python
#Import os module
import os
# Ask the user to enter string to search
search_path = (".")
file_type = (".wpp")
search_str = input("Enter searchword: ")
resultsFile = "results.csv"
file_name = ("results.csv")
# Append a directory separator if not already present
if not (search_path.endswith("/") or search_path.endswith("\\") ):
search_path = search_path + "/"
# If path does not exist, set search path to current directory
if not os.path.exists(search_path):
search_path ="."
0
# Repeat for each file in the directory
for fname in os.listdir(path=search_path):
# Apply file type filter
if fname.endswith(file_type):
# Open file for reading
fo = open(search_path + fname)
# Read the first line from the file
line = fo.readline()
# Initialize counter for line number
line_no = 1
# Loop until EOF
wf = open(search_path + resultsFile, 'a')
while line != '' :
# Search for string in line
index = line.find(search_str)
if ( index != -1) :
print(fname, "[", line_no, ",", index, "] ", line, sep="")
wf.write(line + " ")
# Read next line
line = fo.readline()
# Increment line counter
line_no += 1
# Close the files
fo.close()
def check_if_string_in_file(file_name, string_to_search):
""" Check if any line in the file contains given string """
# Open the file in read only mode
with open(file_name, 'r') as read_obj:
# Read all lines in the file one by one
for line in read_obj:
# For each line, check if line contains the string
if string_to_search in line:
return True
return False
def check_if_string_in_file(file_name2, string_to_search):
""" Check if any line in the file contains given string """
# Open the file in read only mode
with open(file_name2, 'r') as read_obj:
# Read all lines in the file one by one
for line in read_obj:
# For each line, check if line contains the string
if string_to_search in line:
return True
return False
def search_string_in_file(file_name, string_to_search):
"""Search for the given string in file and return lines containing that string,
along with line numbers"""
line_number = 0
list_of_results = []
# Open the file in read only mode
with open(file_name, 'r') as read_obj:
# Read all lines in the file one by one
for line in read_obj:
# For each line, check if line contains the string
line_number += 1
if string_to_search in line:
# If yes, then add the line number & line as a tuple in the list
list_of_results.append((line_number, line.rstrip()))
# Return list of tuples containing line numbers and lines where string is found
return list_of_results
def search_multiple_strings_in_file(file_name, list_of_strings):
"""Get line from the file along with line numbers, which contains any string from the list"""
line_number = 0
list_of_results = []
# Open the file in read only mode
with open(file_name, 'r') as read_obj:
# Read all lines in the file one by one
for line in read_obj:
line_number += 1
# For each line, check if line contains any string from the list of strings
for string_to_search in list_of_strings:
if string_to_search in line:
# If any string is found in line, then append that line along with line number in list
list_of_results.append((string_to_search, line_number, line.rstrip()))
# Return list of tuples containing matched string, line numbers and lines where string is found
return list_of_results
def main():
print('*** Loading *** ')
matched_lines = search_string_in_file(file_name, search_str)
for elem in matched_lines:
print('Line Number = ', elem[0], ' :: Line = ', elem[1])
# search for given strings in the file 'sample.txt'
matched_lines = search_multiple_strings_in_file(file_name, [search_str])
print('*** Checking if', [search_str], 'exists in a file *** ')
print('Total Matched lines : ', len(matched_lines))
# Check if string 'is' is found in file 'sample.txt'
if check_if_string_in_file(file_name, search_str):
print('Yes, string found in file')
else:
print('String not found in file')
if __name__ == '__main__':
main()

Count frequency of words under given index in a file

I am trying to count occurrence of words under specific index in my file and print it out as a dictionary.
def count_by_fruit(file_name="file_with_fruit_data.txt"):
with open(file_name, "r") as file:
content_of_file = file.readlines()
dict_of_fruit_count = {}
for line in content_of_file:
line = line[0:-1]
line = line.split("\t")
for fruit in line:
fruit = line[1]
dict_of_fruit_count[fruit] = dict_of_fruit_count.get(fruit, 0) + 1
return dict_of_fruit_count
print(count_by_fruit())
Output: {'apple': 6, 'banana': 6, 'orange': 3}
I am getting this output, however, it doesn't count frequency of the words correctly. After searching around I didn't seem to find the proper solution. Could anyone help me to identify my mistake?
My file has the following content: (data separated with tabs, put "\t" in example as format is being altered by stackoverflow)
I am line one with \t apple \t from 2018
I am line two with \t orange \t from 2017
I am line three with \t apple \t from 2016
I am line four with \t banana \t from 2010
I am line five with \t banana \t from 1999
You are looping too many times over the same line. Notice that the results you are getting are all 3 times what you are expecting.
Also, in Python, you also do not need to read the entire file. Just iterate over the file object line by line.
Try:
def count_by_fruit(file_name="file_with_fruit_data.txt"):
with open(file_name, "r") as f_in:
dict_of_fruit_count = {}
for line in f_in:
fruit=line.split("\t")[1]
dict_of_fruit_count[fruit] = dict_of_fruit_count.get(fruit, 0) + 1
return dict_of_fruit_count
Which can be further simplified to:
def count_by_fruit(file_name="file_with_fruit_data.txt"):
with open(file_name) as f_in:
dict_of_fruit_count = {}
for fruit in (line.split('\t')[1] for line in f_in):
dict_of_fruit_count[fruit] = dict_of_fruit_count.get(fruit, 0) + 1
return dict_of_fruit_count
Or, if you can use Counter:
from collections import Counter
def count_by_fruit(file_name="file_with_fruit_data.txt"):
with open(file_name) as f_in:
return dict(Counter(line.split('\t')[1] for line in f_in))
The problem is for fruit in line:. Splitting the lines on the tabs is going to split them into three parts. If you loop over those three parts every time, adding one to the count for each, then your counts are going to be 3 times as large as the actual data.
Below is how I would write this function, using generator expressions and Counter.
from collections import Counter
def count_by_fruit(file_name="file_with_fruit_data.txt"):
with open(file_name, "r") as file:
lines = (line[:-1] for line in file)
fruit = (line.split('\t')[1] for line in lines)
return Counter(fruit)

How to fix the code about appending a number in the line?

I create a new column (name:Account) in the csv, then try to make a sequence (c = float(a) + float(b)) and for each number in sequence append to the original line in the csv, which is the value of the new column. Here is my code:
# -*- coding: utf-8 -*-
import csv
with open('./tradedate/2007date.csv') as inf:
reader = csv.reader(inf)
all = []
row = next(reader)
row.append('Amount')
all.append(row)
a =50
for i, line in enumerate(inf):
if i != 0:
size = sum(1 for _ in inf) # count the line number
for b in range(1, size+1):
c = float(a) + float(b) # create the sequence: in 1st line add 1, 2nd line add 2, 3rd line add 3...etc
line.append(c) # this is the error message: AttributeError: 'str' object has no attribute 'append'
all.append(line)
with open('main_test.csv', 'w', newline = '') as new_csv:
csv_writer = csv.writer(new_csv)
csv_writer.writerows(all)
The csv is like this:
日期,成交股數,成交金額,成交筆數,發行量加權股價指數,漲跌點數,Account
96/01/02,"5,738,692,838","141,743,085,172","1,093,711","7,920.80",97.08,51
96/01/03,"5,974,259,385","160,945,755,016","1,160,347","7,917.30",-3.50,52
96/01/04,"5,747,756,529","158,857,947,106","1,131,747","7,934.51",17.21,53
96/01/05,"5,202,769,867","143,781,214,318","1,046,480","7,835.57",-98.94,54
96/01/08,"4,314,344,739","115,425,522,734","888,324","7,736.71",-98.86,55
96/01/09,"4,533,381,664","120,582,511,893","905,970","7,790.01",53.30,56
The Error message is:
Traceback (most recent call last):
File "main.py", line 21, in <module>
line.append(c)
AttributeError: 'str' object has no attribute 'append'
Very thanks for any help!!
I'm a little confused why you're structuring your code this way, but the simplest fix would be to change the append (since you can't append to a string) to += a string version of c, i.e.
line += str(c)
or
line += ',{}'.format(c)
(I'm not clear based on how you're written this if you need the comma or not)
The biggest problem is that you're not using your csv reader - below is a better implementation. With the csv reader it's cleaner to do the append that you want to do versus using the file object directly.
import csv
with open('./tradedate/2007date.csv') as old_csv:
with open('main_test.csv', 'w') as new_csv:
writer = csv.writer(new_csv, lineterminator='\n')
reader = csv.reader(old_csv)
all = []
row = next(reader)
row.append('Line Number')
all.append(row)
line_number = 51
for row in reader:
row.append(line_number)
all.append(row)
line_number += 1
writer.writerows(all)

keep order as it was saved and keep only unique words in text file list

need sort lines in order in which they were saved in txt file, just new line comes from below and save this order after remove similar words. so if I add words in loop one by one
line A
line B
line C
line D
line E
here I got three solutions, but nothing works for me correct
first keeps only unique words;
with open('C:\my_path\doc.txt', 'r') as lines:
lines_set = {line.strip() for line in lines}
with open(''D:\path\file.txt', 'w') as out:
for line in lines_set:
out.write(line + '\n')
but destroys order:
1. line B
2. line E
3. line C
4. line D
5. line A
second keeps order but same words too:
with open('C:\my_path\doc.txt', 'r') as lines:
lines_set = []
for line in lines:
if line.strip() not in lines_set:
lines_set.append(line.strip())
last one works well, but with input text:
with open('C:\my_path\doc.txt', 'r') as lines:
lines_set = []
for line in lines:
if line.strip() not in lines_set:
lines_set.append(line.strip())
in some cases I have no any input, and also have different input, so need somehow sort ordered list itself
can you help me figure out with it please
loadLines is almost as your function you show twice, but it allows duplicates. removeDuplicates strips duplicates. saveLines writes a list to a file, deliminating by newline. All functions preserve order.
#Load lines with duplicates
def loadLines(f):
with open(f, 'r') as lines:
lines_set = []
for line in lines:
lines_set.append(line.strip())
return lines_set
#Search list "l", return list without duplicates.
def removeDuplicates(l):
out = list(set(l))
for i in enumerate(out):
out[i[0]] = l.index(i[1])
out.sort()
for i in enumerate(out):
out[i[0]] = l[i[1]]
return out
#Write the lines "l" to filepath "f"
def saveLines(f, l):
open(f, 'w').write('\n'.join(l))
lines = loadLines('doc.txt')
print(lines)
stripped_lines = removeDuplicates(lines)
print(stripped_lines)
saveLines('doc.txt', stripped_lines)

How can I simplify and format this function?

So I have this messy code where I wanted to get every word from frankenstein.txt, sort them alphabetically, eliminated one and two letter words, and write them into a new file.
def Dictionary():
d = []
count = 0
bad_char = '~!##$%^&*()_+{}|:"<>?\`1234567890-=[]\;\',./ '
replace = ' '*len(bad_char)
table = str.maketrans(bad_char, replace)
infile = open('frankenstein.txt', 'r')
for line in infile:
line = line.translate(table)
for word in line.split():
if len(word) > 2:
d.append(word)
count += 1
infile.close()
file = open('dictionary.txt', 'w')
file.write(str(set(d)))
file.close()
Dictionary()
How can I simplify it and make it more readable and also how can I make the words write vertically in the new file (it writes in a horizontal list):
abbey
abhorred
about
etc....
A few improvements below:
from string import digits, punctuation
def create_dictionary():
words = set()
bad_char = digits + punctuation + '...' # may need more characters
replace = ' ' * len(bad_char)
table = str.maketrans(bad_char, replace)
with open('frankenstein.txt') as infile:
for line in infile:
line = line.strip().translate(table)
for word in line.split():
if len(word) > 2:
words.add(word)
with open('dictionary.txt', 'w') as outfile:
outfile.writelines(sorted(words)) # note 'lines'
A few notes:
follow the style guide
string contains constants you can use to provide the "bad characters";
you never used count (which was just len(d) anyway);
use the with context manager for file handling; and
using a set from the start prevents duplicates, but they aren't ordered (hence sorted).
Using re module.
import re
words = set()
with open('frankenstein.txt') as infile:
for line in infile:
words.extend([x for x in re.split(r'[^A-Za-z]*', line) if len(x) > 2])
with open('dictionary.txt', 'w') as outfile:
outfile.writelines(sorted(words))
From r'[^A-Za-z]*' in re.split, replace 'A-Za-z' with the characters which you want to include in dictionary.txt.

Resources