Create a list from a dictioanary - python-3.x

I have a dictionary
{'about': {'advertise.html': True, 'staff.html': True, 'vacancy.html': True},
'articles': {'2017': {'12': {'19': {'900588.html': True}}}},
'columns': {'2016': {'8': {'5': {'825413.html': True}}},
'2017': {'9': {'8': {'886260.html': True}}}},
'culture': {'2012': {'8': {'28': {'595498.html': True}}}},
'economy': {'2013': {'5': {'23': {'633905.html': True}}},
'2017': {'12': {'22': {'900782.html': True}}},
'2018': {'7': {'27': {'934361.html': True},
'28': {"1111111.html"}}}},
'hournews': True
}
It is necessary to write down all the paths on the list.
In this example, it should be like this:
["about","advertise.html"]
["about","staff.html"]
["about", ,"vacancy.html"]
["articles","2017","12","19","900588.html"]
["columns","2016","8","5","825413.html"]
["columns","2017","9","8","886260.html"]
["culture","2012","8","28","595498.html"]
["hournews"]
How can I do that?
my code:
def get_node(path,tree):
for name,val in tree.items():
if type(val) == dict:
path.append(name)
get_node(path,val)
path = path[:-1]
else:
print(path)
get_node([],tree)
it returns me something like this
['redir', '?source=vz_hour_news', 'news', '2018', '7', 'economy', '2018', '7', 'politics', '2018', '7', 'society', '2018', '7', 'world', '2018', '7', 'incidents', '2018', '6', 'opinions', '2018', '7', 'video', '2018', '6', 'photo', '2018', '7', 'vote', 'sport', '2018', '7', 'columns', '2017', '9', 'culture', '2012', '8', 'articles', '2017', '12']
but must return
["redir","?source=vz_hour_news","&id=934685","&vzurl=news/2018/7/29/934685.html"]
["redir","?source=vz_index_author", "&id=934134", "'&vzurl=opinions/2018/7/25/934134.html"]

Here is a solution using a generator: we explore the dict recursively, building the path while going down. Each time we hit a leaf of the structure, we yield the current path.
d = {'about': {'advertise.html': True, 'staff.html': True, 'vacancy.html': True},
'articles': {'2017': {'12': {'19': {'900588.html': True}}}},
'columns': {'2016': {'8': {'5': {'825413.html': True}}},
'2017': {'9': {'8': {'886260.html': True}}}},
'culture': {'2012': {'8': {'28': {'595498.html': True}}}},
'economy': {'2013': {'5': {'23': {'633905.html': True}}},
'2017': {'12': {'22': {'900782.html': True}}},
'2018': {'7': {'27': {'934361.html': True},
'28': {"1111111.html":True}}}},
'hournews': True
}
def paths(d, current_path=None):
if current_path is None:
current_path = []
if isinstance(d, dict):
for key, value in d.items():
yield from paths(value, current_path + [key])
else:
yield current_path
print(list(paths(d)))
#[['about', 'advertise.html'],
# ['about', 'staff.html'],
# ['about', 'vacancy.html'],
# ['articles', '2017', '12', '19', '900588.html'],
# ['columns', '2016', '8', '5', '825413.html'],
# ['columns', '2017', '9', '8', '886260.html'],
# ['culture', '2012', '8', '28', '595498.html'],
# ['economy', '2013', '5', '23', '633905.html'],
# ['economy', '2017', '12', '22', '900782.html'],
# ['economy', '2018', '7', '27', '934361.html'],
# ['economy', '2018', '7', '28', '1111111.html'],
# ['hournews']]

Related

password generator with logging

import random, logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s')
file_handler = logging.FileHandler('student.log')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
mylist = ['Aa', 'Bb', 'Cc', 'Dd', 'Ee', 'Ff', 'Gg', 'Hh', 'Ii', 'Jj', 'Kk', 'Ll', 'Mm', 'Nn',
'Oo', 'Pp', 'Qq', 'Rr', 'Ss', 'Tt', 'Uu', 'Vv', 'Ww', 'Xx', 'Yy', 'Zz', '1', '2', '3', '4', '5', '6', '7', '8',
'9', '0', '!', '#', '#', '$', '%', '^', '&', '*', '~']
def generatePassword(num):
password = ''
for x in range(mylist):
return password
logging.debug(generatePassword,16)
When I execute the code, complier says that x is an unused variable. Is there a way to fix this? Also, is there any error with how I wrote the logging functions?
You are currently not using x inside your loop, hence the unused variable warning.
Regardless, consider using random.choices if you want to allow the password to possibly contain the same character twice or or random.sample if you don't:
import random
def generate_password(length, unique_chars_ignore_case=False):
my_list = [
'Aa', 'Bb', 'Cc', 'Dd', 'Ee', 'Ff', 'Gg', 'Hh', 'Ii', 'Jj', 'Kk', 'Ll',
'Mm', 'Nn', 'Oo', 'Pp', 'Qq', 'Rr', 'Ss', 'Tt', 'Uu', 'Vv', 'Ww', 'Xx',
'Yy', 'Zz', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '!', '#',
'#', '$', '%', '^', '&', '*', '~'
]
random_func = random.choices if not unique_chars_ignore_case else random.sample
return ''.join([
x if len(x) == 1 else x[random.randint(0, 1)]
for x in random_func(my_list, k=length)
])
Example Usage allows repeats:
>>> generate_password(6)
C9#cs2
Example Usage only unique characters ignore case:
>>> generate_password(6, unique_chars_ignore_case=True))
k*065#

Extraxt a specific field from a list in python

I am having a list in python. I would like to extract the field countryRegion only. How am I supposed to do that. The code lst['countryRegion'] procduces the error : TypeError: list indices must be integers or slices, not str.
Help me please
[{'__type': 'Location:http://schemas.microsoft.com/search/local/ws/rest/v1',
'address': {'adminDistrict': 'Tamil Nadu',
'adminDistrict2': 'Chennai',
'countryRegion': 'India',
'formattedAddress': 'Chennai, Tamil Nadu',
'locality': 'Chennai'},
'bbox': [12.85071, 79.97689, 13.23403, 80.33292],
'confidence': 'High',
'entityType': 'PopulatedPlace',
'geocodePoints': [{'calculationMethod': 'None',
'coordinates': [13.07209, 80.20186],
'type': 'Point',
'usageTypes': ['Display']}],
'matchCodes': ['Good'],
'name': 'Chennai, Tamil Nadu',
'point': {'coordinates': [13.07209, 80.20186], 'type': 'Point'}}]
Given your input
lst = [{'__type': 'Location:http://schemas.microsoft.com/search/local/ws/rest/v1',
'address': {'adminDistrict': 'Tamil Nadu',
'adminDistrict2': 'Chennai',
'countryRegion': 'India',
'formattedAddress': 'Chennai, Tamil Nadu',
'locality': 'Chennai'},
'bbox': [12.85071, 79.97689, 13.23403, 80.33292],
'confidence': 'High',
'entityType': 'PopulatedPlace',
'geocodePoints': [{'calculationMethod': 'None',
'coordinates': [13.07209, 80.20186],
'type': 'Point',
'usageTypes': ['Display']}],
'matchCodes': ['Good'],
'name': 'Chennai, Tamil Nadu',
'point': {'coordinates': [13.07209, 80.20186], 'type': 'Point'}}]
you get your data from
lst[0]['address']['countryRegion']
First, TypeError is due to lst being a list type and not Dictionary type
Second, countryRegion is not a key of your dictionary, but inside the dictionary value from key address
It is producing that error because of the list containing a single element that is of type dictionary.
so to access the desired value you can just use the command
list_dic = [{'__type': 'Location:http://schemas.microsoft.com/search/local/ws/rest/v1',
'address': {'adminDistrict': 'Tamil Nadu',
'adminDistrict2': 'Chennai',
'countryRegion': 'India',
'formattedAddress': 'Chennai, Tamil Nadu',
'locality': 'Chennai'},
'bbox': [12.85071, 79.97689, 13.23403, 80.33292],
'confidence': 'High',
'entityType': 'PopulatedPlace',
'geocodePoints': [{'calculationMethod': 'None',
'coordinates': [13.07209, 80.20186],
'type': 'Point',
'usageTypes': ['Display']}],
'matchCodes': ['Good'],
'name': 'Chennai, Tamil Nadu',
'point': {'coordinates': [13.07209, 80.20186], 'type': 'Point'}}]
lst_dic[0]['address']['countryRegion']
That's because it is a list of dictionary.
I dont know how you obtain the json but here's what i was able to get:
js=[{'__type': 'Location:http://schemas.microsoft.com/search/local/ws/rest/v1',
'address': {'adminDistrict': 'Tamil Nadu',
'adminDistrict2': 'Chennai',
'countryRegion': 'India',
'formattedAddress': 'Chennai, Tamil Nadu',
'locality': 'Chennai'},
'bbox': [12.85071, 79.97689, 13.23403, 80.33292],
'confidence': 'High',
'entityType': 'PopulatedPlace',
'geocodePoints': [{'calculationMethod': 'None',
'coordinates': [13.07209, 80.20186],
'type': 'Point',
'usageTypes': ['Display']}],
'matchCodes': ['Good'],
'name': 'Chennai, Tamil Nadu',
'point': {'coordinates': [13.07209, 80.20186], 'type': 'Point'}}]
for i in range(len(js)):
print(js[i]["address"]["countryRegion"])

AWK + gsub - how to round floating number

Do you have an idea of how I can round float numbers after multiplying?
I have the following SQL dump:
INSERT INTO
`honzavolfcz_product` (`product_id`, `feed_product_id`, `import_id`,
`import_active_product`, `model`, `sku`, `upc`, `ean`, `jan`, `isbn`, `mpn`,
`location`, `quantity`, `stock_status_id`, `product_status_id`, `image`,
`manufacturer_id`, `shipping`, `price`, `points`, `tax_class_id`,
`date_available`, `weight`, `weight_class_id`, `length`, `width`, `height`,
`length_class_id`, `subtract`, `minimum`, `sort_order`, `status`, `date_added`,
`date_modified`, `viewed`)
VALUES ('10', '0', '1',
'1', 'model', '', '', '', '', '', '',
'', '1', '1', '0', 'catalog/zbozi/bozi_laska_obal.jpg',
'0', '1', '**112.50**', '0', '1',
'2019-01-15', '0.00', '1', '0.00', '0.00', '0.00',
'1', '0', '1', '0', '1', '2019-02-15 16:16:29',
'2019-02-15 16:16:29', '293');
And I want to multiply the price value (112.50) by 1.21 (taxes) and the round-up or down. I wrote the following command which does the multiplication but I do not know how to round it:
awk '{a=substr($58,2,length($58)-3);gsub(a,a*1.21);print}' a > b
The result:
INSERT INTO
`honzavolfcz_product` (`product_id`, `feed_product_id`, `import_id`,
`import_active_product`, `model`, `sku`, `upc`, `ean`, `jan`, `isbn`, `mpn`,
`location`, `quantity`, `stock_status_id`, `product_status_id`, `image`,
`manufacturer_id`, `shipping`, `price`, `points`, `tax_class_id`,
`date_available`, `weight`, `weight_class_id`, `length`, `width`, `height`,
`length_class_id`, `subtract`, `minimum`, `sort_order`, `status`, `date_added`,
`date_modified`, `viewed`)
VALUES ('10', '0', '1',
'1', 'model', '', '', '', '', '', '',
'', '1', '1', '0', 'catalog/zbozi/bozi_laska_obal.jpg',
'0', '1', '**136.125**', '0', '1',
'2019-01-15', '0.00', '1', '0.00', '0.00', '0.00',
'1', '0', '1', '0', '1', '2019-02-15 16:16:29',
'2019-02-15 16:16:29', '293');
I would like to have there 136 instead of 136.125. Of course, 137 if it would be 136.555.
Thank you in advance.
This may be what you want:
$ awk '{a=substr($58,2); $58=sprintf("\047%d\047,",a*1.21)} 1' file
INSERT INTO honzavolfcz_product (product_id, feed_product_id, import_id, import_active_product, model, sku, upc, ean, jan, isbn, mpn, location, quantity, stock_status_id, product_status_id, image, manufacturer_id, shipping, price, points, tax_class_id, date_available, weight, weight_class_id, length, width, height, length_class_id, subtract, minimum, sort_order, status, date_added, date_modified, viewed) VALUES ('10', '0', '1', '1', 'model', '', '', '', '', '', '', '', '1', '1', '0', 'catalog/zbozi/bozi_laska_obal.jpg', '0', '1', '136', '0', '1', '2019-01-15', '0.00', '1', '0.00', '0.00', '0.00', '1', '0', '1', '0', '1', '2019-02-15 16:16:29', '2019-02-15 16:16:29', '293');
but the rounding probably won't go quite as you'd like by default. See https://www.gnu.org/software/gawk/manual/gawk.html#Round-Function and https://www.gnu.org/software/gawk/manual/gawk.html#Setting-the-rounding-mode for how to control it with GNU awk.

How do I know which topic this word comes in?

This code works fine but I want to know the topic name instead of Topic: 0 and Topic:1, How do i know which topic this word comes in?
for index, topic in lda_model.show_topics(formatted=False, num_words= 30):
print('Topic: {} \nWords: {}'.format(idx, [w[0] for w in topic]))
This is ouput
Topic: 0
Words: ['associate', 'incident', 'time', 'task', 'pain', 'amcare', 'work', 'ppe', 'train', 'proper', 'report', 'standard', 'pmv', 'level', 'perform', 'wear', 'date', 'factor', 'overtime', 'location', 'area', 'yes', 'new', 'treatment', 'start', 'stretch', 'assign', 'condition', 'participate', 'environmental']
Topic: 1
Words: ['work', 'associate', 'cage', 'aid', 'shift', 'leave', 'area', 'eye', 'incident', 'aider', 'hit', 'pit', 'manager', 'return', 'start', 'continue', 'pick', 'call', 'come', 'right', 'take', 'report', 'lead', 'break', 'paramedic', 'receive', 'get', 'inform', 'room', 'head']
I want "Topic Name" instead of Topic : 0
Topic: 0
Words: ['associate', 'incident', 'time', 'task', 'pain', 'amcare', 'work', 'ppe', 'train', 'proper', 'report', 'standard', 'pmv', 'level', 'perform', 'wear', 'date', 'factor', 'overtime', 'location', 'area', 'yes', 'new', 'treatment', 'start', 'stretch', 'assign', 'condition', 'participate', 'environmental']
Topic: 1
Words: ['work', 'associate', 'cage', 'aid', 'shift', 'leave', 'area', 'eye', 'incident', 'aider', 'hit', 'pit', 'manager', 'return', 'start', 'continue', 'pick', 'call', 'come', 'right', 'take', 'report', 'lead', 'break', 'paramedic', 'receive', 'get', 'inform', 'room', 'head']
This might work (Untested)
for index, topic in lda_model.show_topics(formatted=False, num_words= 30):
print('Topic: {} \nWords: {}'.format(lda_model.print_topic(index), [w[0] for w in topic]))
Try changing the Formatted parameter to True like this:
for index, topic in lda_model.show_topics(formatted=True, num_words= 30):
print('Topic: {} \nWords: {}'.format(topic[0], [w[0] for w in topic[1]]))
You can also check out the documentation for more information:
https://radimrehurek.com/gensim/models/ldamodel.html

Select first element for one sublist in a 2d list python

QUAKE_DATA = [
['2017-11-16T18:42:11.676Z', '61.7647', '-153.9615', '0.8', '2.1', 'ml',
'', '', '', '0.64', 'ak', 'ak17253456',
'2017-11-16T18:58:24.707Z', '156km NNW of Redoubt Volcano, Alaska', 'earthquake',
'', '0.2', '', '', 'automatic', 'ak', 'ak'],
['2017-11-16T18:35:00.940Z', '34.1638333', '-116.4253333', '10.17', '1.76', 'ml',
'58', '33', '0.03663', '0.17', 'ci', 'ci37812975',
'2017-11-16T19:14:13.440Z', '6km N of Yucca Valley, CA', 'earthquake',
'0.14', '0.32', '0.18', '50', 'reviewed', 'ci', 'ci'],
['2017-11-16T18:06:15.460Z', '34.0181667', '-116.862', '17.3', '0.9', 'ml',
'23', '108', '0.04811', '0.12', 'ci', 'ci37812967',
'2017-11-16T19:23:12.335Z', '10km N of Banning, CA', 'earthquake',
'0.23', '0.61', '0.068', '13', 'reviewed', 'ci', 'ci'],
['2017-11-16T17:59:31.810Z', '34.1671667', '-116.4225', '10.6', '1.08', 'ml',
'33', '61', '0.03261', '0.17', 'ci', 'ci37812951',
'2017-11-16T18:57:01.554Z', '6km N of Yucca Valley, CA', 'earthquake',
'0.25', '0.37', '0.169', '13', 'reviewed', 'ci', 'ci'],
['2017-11-16T17:47:50.270Z', '37.7361679', '-122.1466675', '4.09', '1.52', 'md',
'12', '126', '0.0248', '0.04', 'nc', 'nc72925680',
'2017-11-16T18:34:02.533Z', '1km NNE of San Leandro, California', 'earthquake',
'0.25', '0.29', '0.13', '8', 'automatic', 'nc', 'nc'],
['2017-11-16T17:44:51.030Z', '37.5636673', '-118.8346634', '1.8', '1.66', 'md',
'16', '196', '0.02668', '0.04', 'nc', 'nc72925675',
'2017-11-16T18:23:03.511Z', '15km SE of Mammoth Lakes, California', 'earthquake',
'0.63', '0.43', '0.25', '13', 'automatic', 'nc', 'nc'],
['2017-11-16T17:34:22.310Z', '33.9796667', '-118.782', '14.78', '2.47', 'ml',
'41', '97', '0.06482', '0.25', 'ci', 'ci37812839',
'2017-11-16T19:11:53.824Z', '4km SE of Malibu, CA', 'earthquake',
'0.36', '0.68', '0.13', '94', 'reviewed', 'ci', 'ci']
]
for data in QUAKE_DATA:
print (data[0])
result I am getting:
2017-11-16T18:42:11.676Z
2017-11-16T18:35:00.940Z
2017-11-16T18:06:15.460Z
2017-11-16T17:59:31.810Z
2017-11-16T17:47:50.270Z
2017-11-16T17:44:51.030Z
2017-11-16T17:34:22.310Z
If you want the first element from the first sub-list, just take the first sub-list with:
QUAKE_DATA[0]
and then take the first element from that sub-list by indexing again:
QUAKE_DATA[0][0]
Simple as that, giving:
'2017-11-16T18:42:11.676Z'
There is no need for a for-loop as you just want to get one element which you can index directly. As it is, you are looping through every list in QUAKE_DATA and printing the first item from that list.

Resources