Select first element for one sublist in a 2d list python - python-3.x

QUAKE_DATA = [
['2017-11-16T18:42:11.676Z', '61.7647', '-153.9615', '0.8', '2.1', 'ml',
'', '', '', '0.64', 'ak', 'ak17253456',
'2017-11-16T18:58:24.707Z', '156km NNW of Redoubt Volcano, Alaska', 'earthquake',
'', '0.2', '', '', 'automatic', 'ak', 'ak'],
['2017-11-16T18:35:00.940Z', '34.1638333', '-116.4253333', '10.17', '1.76', 'ml',
'58', '33', '0.03663', '0.17', 'ci', 'ci37812975',
'2017-11-16T19:14:13.440Z', '6km N of Yucca Valley, CA', 'earthquake',
'0.14', '0.32', '0.18', '50', 'reviewed', 'ci', 'ci'],
['2017-11-16T18:06:15.460Z', '34.0181667', '-116.862', '17.3', '0.9', 'ml',
'23', '108', '0.04811', '0.12', 'ci', 'ci37812967',
'2017-11-16T19:23:12.335Z', '10km N of Banning, CA', 'earthquake',
'0.23', '0.61', '0.068', '13', 'reviewed', 'ci', 'ci'],
['2017-11-16T17:59:31.810Z', '34.1671667', '-116.4225', '10.6', '1.08', 'ml',
'33', '61', '0.03261', '0.17', 'ci', 'ci37812951',
'2017-11-16T18:57:01.554Z', '6km N of Yucca Valley, CA', 'earthquake',
'0.25', '0.37', '0.169', '13', 'reviewed', 'ci', 'ci'],
['2017-11-16T17:47:50.270Z', '37.7361679', '-122.1466675', '4.09', '1.52', 'md',
'12', '126', '0.0248', '0.04', 'nc', 'nc72925680',
'2017-11-16T18:34:02.533Z', '1km NNE of San Leandro, California', 'earthquake',
'0.25', '0.29', '0.13', '8', 'automatic', 'nc', 'nc'],
['2017-11-16T17:44:51.030Z', '37.5636673', '-118.8346634', '1.8', '1.66', 'md',
'16', '196', '0.02668', '0.04', 'nc', 'nc72925675',
'2017-11-16T18:23:03.511Z', '15km SE of Mammoth Lakes, California', 'earthquake',
'0.63', '0.43', '0.25', '13', 'automatic', 'nc', 'nc'],
['2017-11-16T17:34:22.310Z', '33.9796667', '-118.782', '14.78', '2.47', 'ml',
'41', '97', '0.06482', '0.25', 'ci', 'ci37812839',
'2017-11-16T19:11:53.824Z', '4km SE of Malibu, CA', 'earthquake',
'0.36', '0.68', '0.13', '94', 'reviewed', 'ci', 'ci']
]
for data in QUAKE_DATA:
print (data[0])
result I am getting:
2017-11-16T18:42:11.676Z
2017-11-16T18:35:00.940Z
2017-11-16T18:06:15.460Z
2017-11-16T17:59:31.810Z
2017-11-16T17:47:50.270Z
2017-11-16T17:44:51.030Z
2017-11-16T17:34:22.310Z

If you want the first element from the first sub-list, just take the first sub-list with:
QUAKE_DATA[0]
and then take the first element from that sub-list by indexing again:
QUAKE_DATA[0][0]
Simple as that, giving:
'2017-11-16T18:42:11.676Z'
There is no need for a for-loop as you just want to get one element which you can index directly. As it is, you are looping through every list in QUAKE_DATA and printing the first item from that list.

Related

password generator with logging

import random, logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s')
file_handler = logging.FileHandler('student.log')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
mylist = ['Aa', 'Bb', 'Cc', 'Dd', 'Ee', 'Ff', 'Gg', 'Hh', 'Ii', 'Jj', 'Kk', 'Ll', 'Mm', 'Nn',
'Oo', 'Pp', 'Qq', 'Rr', 'Ss', 'Tt', 'Uu', 'Vv', 'Ww', 'Xx', 'Yy', 'Zz', '1', '2', '3', '4', '5', '6', '7', '8',
'9', '0', '!', '#', '#', '$', '%', '^', '&', '*', '~']
def generatePassword(num):
password = ''
for x in range(mylist):
return password
logging.debug(generatePassword,16)
When I execute the code, complier says that x is an unused variable. Is there a way to fix this? Also, is there any error with how I wrote the logging functions?
You are currently not using x inside your loop, hence the unused variable warning.
Regardless, consider using random.choices if you want to allow the password to possibly contain the same character twice or or random.sample if you don't:
import random
def generate_password(length, unique_chars_ignore_case=False):
my_list = [
'Aa', 'Bb', 'Cc', 'Dd', 'Ee', 'Ff', 'Gg', 'Hh', 'Ii', 'Jj', 'Kk', 'Ll',
'Mm', 'Nn', 'Oo', 'Pp', 'Qq', 'Rr', 'Ss', 'Tt', 'Uu', 'Vv', 'Ww', 'Xx',
'Yy', 'Zz', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '!', '#',
'#', '$', '%', '^', '&', '*', '~'
]
random_func = random.choices if not unique_chars_ignore_case else random.sample
return ''.join([
x if len(x) == 1 else x[random.randint(0, 1)]
for x in random_func(my_list, k=length)
])
Example Usage allows repeats:
>>> generate_password(6)
C9#cs2
Example Usage only unique characters ignore case:
>>> generate_password(6, unique_chars_ignore_case=True))
k*065#

AWK + gsub - how to round floating number

Do you have an idea of how I can round float numbers after multiplying?
I have the following SQL dump:
INSERT INTO
`honzavolfcz_product` (`product_id`, `feed_product_id`, `import_id`,
`import_active_product`, `model`, `sku`, `upc`, `ean`, `jan`, `isbn`, `mpn`,
`location`, `quantity`, `stock_status_id`, `product_status_id`, `image`,
`manufacturer_id`, `shipping`, `price`, `points`, `tax_class_id`,
`date_available`, `weight`, `weight_class_id`, `length`, `width`, `height`,
`length_class_id`, `subtract`, `minimum`, `sort_order`, `status`, `date_added`,
`date_modified`, `viewed`)
VALUES ('10', '0', '1',
'1', 'model', '', '', '', '', '', '',
'', '1', '1', '0', 'catalog/zbozi/bozi_laska_obal.jpg',
'0', '1', '**112.50**', '0', '1',
'2019-01-15', '0.00', '1', '0.00', '0.00', '0.00',
'1', '0', '1', '0', '1', '2019-02-15 16:16:29',
'2019-02-15 16:16:29', '293');
And I want to multiply the price value (112.50) by 1.21 (taxes) and the round-up or down. I wrote the following command which does the multiplication but I do not know how to round it:
awk '{a=substr($58,2,length($58)-3);gsub(a,a*1.21);print}' a > b
The result:
INSERT INTO
`honzavolfcz_product` (`product_id`, `feed_product_id`, `import_id`,
`import_active_product`, `model`, `sku`, `upc`, `ean`, `jan`, `isbn`, `mpn`,
`location`, `quantity`, `stock_status_id`, `product_status_id`, `image`,
`manufacturer_id`, `shipping`, `price`, `points`, `tax_class_id`,
`date_available`, `weight`, `weight_class_id`, `length`, `width`, `height`,
`length_class_id`, `subtract`, `minimum`, `sort_order`, `status`, `date_added`,
`date_modified`, `viewed`)
VALUES ('10', '0', '1',
'1', 'model', '', '', '', '', '', '',
'', '1', '1', '0', 'catalog/zbozi/bozi_laska_obal.jpg',
'0', '1', '**136.125**', '0', '1',
'2019-01-15', '0.00', '1', '0.00', '0.00', '0.00',
'1', '0', '1', '0', '1', '2019-02-15 16:16:29',
'2019-02-15 16:16:29', '293');
I would like to have there 136 instead of 136.125. Of course, 137 if it would be 136.555.
Thank you in advance.
This may be what you want:
$ awk '{a=substr($58,2); $58=sprintf("\047%d\047,",a*1.21)} 1' file
INSERT INTO honzavolfcz_product (product_id, feed_product_id, import_id, import_active_product, model, sku, upc, ean, jan, isbn, mpn, location, quantity, stock_status_id, product_status_id, image, manufacturer_id, shipping, price, points, tax_class_id, date_available, weight, weight_class_id, length, width, height, length_class_id, subtract, minimum, sort_order, status, date_added, date_modified, viewed) VALUES ('10', '0', '1', '1', 'model', '', '', '', '', '', '', '', '1', '1', '0', 'catalog/zbozi/bozi_laska_obal.jpg', '0', '1', '136', '0', '1', '2019-01-15', '0.00', '1', '0.00', '0.00', '0.00', '1', '0', '1', '0', '1', '2019-02-15 16:16:29', '2019-02-15 16:16:29', '293');
but the rounding probably won't go quite as you'd like by default. See https://www.gnu.org/software/gawk/manual/gawk.html#Round-Function and https://www.gnu.org/software/gawk/manual/gawk.html#Setting-the-rounding-mode for how to control it with GNU awk.

How to save spark dataframe with different table name on each iteration using saveAsTable in pyspark

Platform: RHEL 7, cloudera CDH 6.2 hadoop distrubution, pyspark 3.7.1
What i tried: I could write a table to hive warehouse when I explicitly mention the table name as saveAsTable("tablename"). But, i am getting below error when I try to take the table name from a python variable in a "for loop" as shown below.
Similar to: How to save a dataframe result in hive table with different name on each iteration using pyspark
prefix_list = ["hive_table_name1","hive_table_name2", "hive_table_name3"]
list1 = ["dataframe_content_1", "dataframe_content__2", "dataframe_content_3"]
for index, l in enumerate(list1):
selecteddata = df.select(l)
#Embedding table name within quotations
tablename = '"' + prefix_list[index] + '"'
# write the "selecteddata" dataframe to hive table
selecteddata.write.mode("overwrite").saveAsTable(tablename)
Expected: 3 different hive tables in default hive warehouse
Actual:
"ReturnMessages"
Traceback (most recent call last):
File "/opt/cloudera/parcels/CDH-6.2.0-1.cdh6.2.0.p0.967373/lib/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 63, in deco
File "/opt/cloudera/parcels/CDH-6.2.0-1.cdh6.2.0.p0.967373/lib/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py", line 328, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o86.saveAsTable.
: org.apache.spark.sql.catalyst.parser.ParseException:
mismatched input '"ReturnMessages"' expecting {'SELECT', 'FROM', 'ADD', 'AS', 'ALL', 'ANY', 'DISTINCT', 'WHERE', 'GROUP', 'BY', 'GROUPING', 'SETS', 'CUBE', 'ROLLUP', 'ORDER', 'HAVING', 'LIMIT', 'AT', 'OR', 'AND', 'IN', NOT, 'NO', 'EXISTS', 'BETWEEN', 'LIKE', RLIKE, 'IS', 'NULL', 'TRUE', 'FALSE', 'NULLS', 'ASC', 'DESC', 'FOR', 'INTERVAL', 'CASE', 'WHEN', 'THEN', 'ELSE', 'END', 'JOIN', 'CROSS', 'OUTER', 'INNER', 'LEFT', 'SEMI', 'RIGHT', 'FULL', 'NATURAL', 'ON', 'PIVOT', 'LATERAL', 'WINDOW', 'OVER', 'PARTITION', 'RANGE', 'ROWS', 'UNBOUNDED', 'PRECEDING', 'FOLLOWING', 'CURRENT', 'FIRST', 'AFTER', 'LAST', 'ROW', 'WITH', 'VALUES', 'CREATE', 'TABLE', 'DIRECTORY', 'VIEW', 'REPLACE', 'INSERT', 'DELETE', 'INTO', 'DESCRIBE', 'EXPLAIN', 'FORMAT', 'LOGICAL', 'CODEGEN', 'COST', 'CAST', 'SHOW', 'TABLES', 'COLUMNS', 'COLUMN', 'USE', 'PARTITIONS', 'FUNCTIONS', 'DROP', 'UNION', 'EXCEPT', 'MINUS', 'INTERSECT', 'TO', 'TABLESAMPLE', 'STRATIFY', 'ALTER', 'RENAME', 'ARRAY', 'MAP', 'STRUCT', 'COMMENT', 'SET', 'RESET', 'DATA', 'START', 'TRANSACTION', 'COMMIT', 'ROLLBACK', 'MACRO', 'IGNORE', 'BOTH', 'LEADING', 'TRAILING', 'IF', 'POSITION', 'EXTRACT', 'DIV', 'PERCENT', 'BUCKET', 'OUT', 'OF', 'SORT', 'CLUSTER', 'DISTRIBUTE', 'OVERWRITE', 'TRANSFORM', 'REDUCE', 'SERDE', 'SERDEPROPERTIES', 'RECORDREADER', 'RECORDWRITER', 'DELIMITED', 'FIELDS', 'TERMINATED', 'COLLECTION', 'ITEMS', 'KEYS', 'ESCAPED', 'LINES', 'SEPARATED', 'FUNCTION', 'EXTENDED', 'REFRESH', 'CLEAR', 'CACHE', 'UNCACHE', 'LAZY', 'FORMATTED', 'GLOBAL', TEMPORARY, 'OPTIONS', 'UNSET', 'TBLPROPERTIES', 'DBPROPERTIES', 'BUCKETS', 'SKEWED', 'STORED', 'DIRECTORIES', 'LOCATION', 'EXCHANGE', 'ARCHIVE', 'UNARCHIVE', 'FILEFORMAT', 'TOUCH', 'COMPACT', 'CONCATENATE', 'CHANGE', 'CASCADE', 'RESTRICT', 'CLUSTERED', 'SORTED', 'PURGE', 'INPUTFORMAT', 'OUTPUTFORMAT', DATABASE, DATABASES, 'DFS', 'TRUNCATE', 'ANALYZE', 'COMPUTE', 'LIST', 'STATISTICS', 'PARTITIONED', 'EXTERNAL', 'DEFINED', 'REVOKE', 'GRANT', 'LOCK', 'UNLOCK', 'MSCK', 'REPAIR', 'RECOVER', 'EXPORT', 'IMPORT', 'LOAD', 'ROLE', 'ROLES', 'COMPACTIONS', 'PRINCIPALS', 'TRANSACTIONS', 'INDEX', 'INDEXES', 'LOCKS', 'OPTION', 'ANTI', 'LOCAL', 'INPATH', IDENTIFIER, BACKQUOTED_IDENTIFIER}(line 1, pos 0)
== SQL ==
"ReturnMessages"
^^^
at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:241)
at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:117)
at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:48)
at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parseTableIdentifier(ParseDriver.scala:49)
at org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:400)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/37Pro/files/Partnerdaten.py", line 144, in <module>
dataframe.write.saveAsTable(filename, format="parquet", mode="overwrite")
File "/opt/cloudera/parcels/CDH-6.2.0-1.cdh6.2.0.p0.967373/lib/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 775, in saveAsTable
File "/opt/cloudera/parcels/CDH-6.2.0-1.cdh6.2.0.p0.967373/lib/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1257, in __call__
File "/opt/cloudera/parcels/CDH-6.2.0-1.cdh6.2.0.p0.967373/lib/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 73, in deco
pyspark.sql.utils.ParseException: '\nmismatched input \'"ReturnMessages"\' expecting {\'SELECT\', \'FROM\', \'ADD\', \'AS\', \'ALL\', \'ANY\', \'DISTINCT\', \'WHERE\', \'GROUP\', \'BY\', \'GROUPING\', \'SETS\', \'CUBE\', \'ROLLUP\', \'ORDER\', \'HAVING\', \'LIMIT\', \'AT\', \'OR\', \'AND\', \'IN\', NOT, \'NO\', \'EXISTS\', \'BETWEEN\', \'LIKE\', RLIKE, \'IS\', \'NULL\', \'TRUE\', \'FALSE\', \'NULLS\', \'ASC\', \'DESC\', \'FOR\', \'INTERVAL\', \'CASE\', \'WHEN\', \'THEN\', \'ELSE\', \'END\', \'JOIN\', \'CROSS\', \'OUTER\', \'INNER\', \'LEFT\', \'SEMI\', \'RIGHT\', \'FULL\', \'NATURAL\', \'ON\', \'PIVOT\', \'LATERAL\', \'WINDOW\', \'OVER\', \'PARTITION\', \'RANGE\', \'ROWS\', \'UNBOUNDED\', \'PRECEDING\', \'FOLLOWING\', \'CURRENT\', \'FIRST\', \'AFTER\', \'LAST\', \'ROW\', \'WITH\', \'VALUES\', \'CREATE\', \'TABLE\', \'DIRECTORY\', \'VIEW\', \'REPLACE\', \'INSERT\', \'DELETE\', \'INTO\', \'DESCRIBE\', \'EXPLAIN\', \'FORMAT\', \'LOGICAL\', \'CODEGEN\', \'COST\', \'CAST\', \'SHOW\', \'TABLES\', \'COLUMNS\', \'COLUMN\', \'USE\', \'PARTITIONS\', \'FUNCTIONS\', \'DROP\', \'UNION\', \'EXCEPT\', \'MINUS\', \'INTERSECT\', \'TO\', \'TABLESAMPLE\', \'STRATIFY\', \'ALTER\', \'RENAME\', \'ARRAY\', \'MAP\',
You are not specifying the name of the database in your write statement.
Here is how I would do what you are trying to do:
database_name = "my_database"
prefix_list = ["hive_table_name1","hive_table_name2", "hive_table_name3"]
list1 = ["dataframe_content_1", "dataframe_content_2", "dataframe_content_3"]
for index, l in enumerate(list1):
selecteddata = df.select(l)
#Embedding table name within quotations
tablename = prefix_list[index]
# map to the correct database and table
db_name_and_corresponding_table = f"{database_name}.{tablename}"
# write the "selecteddata" dataframe to hive table
selecteddata.write.mode("overwrite").saveAsTable(db_name_and_corresponding_table)
Hope that helps.

How do I know which topic this word comes in?

This code works fine but I want to know the topic name instead of Topic: 0 and Topic:1, How do i know which topic this word comes in?
for index, topic in lda_model.show_topics(formatted=False, num_words= 30):
print('Topic: {} \nWords: {}'.format(idx, [w[0] for w in topic]))
This is ouput
Topic: 0
Words: ['associate', 'incident', 'time', 'task', 'pain', 'amcare', 'work', 'ppe', 'train', 'proper', 'report', 'standard', 'pmv', 'level', 'perform', 'wear', 'date', 'factor', 'overtime', 'location', 'area', 'yes', 'new', 'treatment', 'start', 'stretch', 'assign', 'condition', 'participate', 'environmental']
Topic: 1
Words: ['work', 'associate', 'cage', 'aid', 'shift', 'leave', 'area', 'eye', 'incident', 'aider', 'hit', 'pit', 'manager', 'return', 'start', 'continue', 'pick', 'call', 'come', 'right', 'take', 'report', 'lead', 'break', 'paramedic', 'receive', 'get', 'inform', 'room', 'head']
I want "Topic Name" instead of Topic : 0
Topic: 0
Words: ['associate', 'incident', 'time', 'task', 'pain', 'amcare', 'work', 'ppe', 'train', 'proper', 'report', 'standard', 'pmv', 'level', 'perform', 'wear', 'date', 'factor', 'overtime', 'location', 'area', 'yes', 'new', 'treatment', 'start', 'stretch', 'assign', 'condition', 'participate', 'environmental']
Topic: 1
Words: ['work', 'associate', 'cage', 'aid', 'shift', 'leave', 'area', 'eye', 'incident', 'aider', 'hit', 'pit', 'manager', 'return', 'start', 'continue', 'pick', 'call', 'come', 'right', 'take', 'report', 'lead', 'break', 'paramedic', 'receive', 'get', 'inform', 'room', 'head']
This might work (Untested)
for index, topic in lda_model.show_topics(formatted=False, num_words= 30):
print('Topic: {} \nWords: {}'.format(lda_model.print_topic(index), [w[0] for w in topic]))
Try changing the Formatted parameter to True like this:
for index, topic in lda_model.show_topics(formatted=True, num_words= 30):
print('Topic: {} \nWords: {}'.format(topic[0], [w[0] for w in topic[1]]))
You can also check out the documentation for more information:
https://radimrehurek.com/gensim/models/ldamodel.html

Create a list from a dictioanary

I have a dictionary
{'about': {'advertise.html': True, 'staff.html': True, 'vacancy.html': True},
'articles': {'2017': {'12': {'19': {'900588.html': True}}}},
'columns': {'2016': {'8': {'5': {'825413.html': True}}},
'2017': {'9': {'8': {'886260.html': True}}}},
'culture': {'2012': {'8': {'28': {'595498.html': True}}}},
'economy': {'2013': {'5': {'23': {'633905.html': True}}},
'2017': {'12': {'22': {'900782.html': True}}},
'2018': {'7': {'27': {'934361.html': True},
'28': {"1111111.html"}}}},
'hournews': True
}
It is necessary to write down all the paths on the list.
In this example, it should be like this:
["about","advertise.html"]
["about","staff.html"]
["about", ,"vacancy.html"]
["articles","2017","12","19","900588.html"]
["columns","2016","8","5","825413.html"]
["columns","2017","9","8","886260.html"]
["culture","2012","8","28","595498.html"]
["hournews"]
How can I do that?
my code:
def get_node(path,tree):
for name,val in tree.items():
if type(val) == dict:
path.append(name)
get_node(path,val)
path = path[:-1]
else:
print(path)
get_node([],tree)
it returns me something like this
['redir', '?source=vz_hour_news', 'news', '2018', '7', 'economy', '2018', '7', 'politics', '2018', '7', 'society', '2018', '7', 'world', '2018', '7', 'incidents', '2018', '6', 'opinions', '2018', '7', 'video', '2018', '6', 'photo', '2018', '7', 'vote', 'sport', '2018', '7', 'columns', '2017', '9', 'culture', '2012', '8', 'articles', '2017', '12']
but must return
["redir","?source=vz_hour_news","&id=934685","&vzurl=news/2018/7/29/934685.html"]
["redir","?source=vz_index_author", "&id=934134", "'&vzurl=opinions/2018/7/25/934134.html"]
Here is a solution using a generator: we explore the dict recursively, building the path while going down. Each time we hit a leaf of the structure, we yield the current path.
d = {'about': {'advertise.html': True, 'staff.html': True, 'vacancy.html': True},
'articles': {'2017': {'12': {'19': {'900588.html': True}}}},
'columns': {'2016': {'8': {'5': {'825413.html': True}}},
'2017': {'9': {'8': {'886260.html': True}}}},
'culture': {'2012': {'8': {'28': {'595498.html': True}}}},
'economy': {'2013': {'5': {'23': {'633905.html': True}}},
'2017': {'12': {'22': {'900782.html': True}}},
'2018': {'7': {'27': {'934361.html': True},
'28': {"1111111.html":True}}}},
'hournews': True
}
def paths(d, current_path=None):
if current_path is None:
current_path = []
if isinstance(d, dict):
for key, value in d.items():
yield from paths(value, current_path + [key])
else:
yield current_path
print(list(paths(d)))
#[['about', 'advertise.html'],
# ['about', 'staff.html'],
# ['about', 'vacancy.html'],
# ['articles', '2017', '12', '19', '900588.html'],
# ['columns', '2016', '8', '5', '825413.html'],
# ['columns', '2017', '9', '8', '886260.html'],
# ['culture', '2012', '8', '28', '595498.html'],
# ['economy', '2013', '5', '23', '633905.html'],
# ['economy', '2017', '12', '22', '900782.html'],
# ['economy', '2018', '7', '27', '934361.html'],
# ['economy', '2018', '7', '28', '1111111.html'],
# ['hournews']]

Resources