Spark cannot serialise recursion function, giving PicklingError - apache-spark

I am writing a pyspark program, containing a recursion function. When I execute this program, will get error below.
Traceback (most recent call last):
File "/Applications/PyCharm CE.app/Contents/helpers/pydev/pydevd.py", line 1578, in <module>
globals = debugger.run(setup['file'], None, None, is_module)
File "/Applications/PyCharm CE.app/Contents/helpers/pydev/pydevd.py", line 1015, in run
pydev_imports.execfile(file, globals, locals) # execute the script
File "/Users/Documents/repos/
File "/Users/Documents/repos/main.py", line 62, in main
run_by_date(dt.datetime.today() - dt.timedelta(days=1))
File "/Users/Documents/repos/main.py", line 50, in run_by_date
parsed_rdd.repartition(1).saveAsTextFile(save_path)
File "/Users/Documents/tools/spark-2.1.0/python/pyspark/rdd.py", line 2058, in repartition
return self.coalesce(numPartitions, shuffle=True)
File "/Users/Documents/tools/spark-2.1.0/python/pyspark/rdd.py", line 2075, in coalesce
jrdd = selfCopy._jrdd.coalesce(numPartitions, shuffle)
File "/Users/Documents/tools/spark-2.1.0/python/pyspark/rdd.py", line 2439, in _jrdd
self._jrdd_deserializer, profiler)
File "/Users/Documents/tools/spark-2.1.0/python/pyspark/rdd.py", line 2372, in _wrap_function
pickled_command, broadcast_vars, env, includes = _prepare_for_python_RDD(sc, command)
File "/Users/Documents/tools/spark-2.1.0/python/pyspark/rdd.py", line 2358, in _prepare_for_python_RDD
pickled_command = ser.dumps(command)
File "/Users/Documents/tools/spark-2.1.0/python/pyspark/serializers.py", line 440, in dumps
return cloudpickle.dumps(obj, 2)
File "/Users/Documents/tools/spark-2.1.0/python/pyspark/cloudpickle.py", line 667, in dumps
cp.dump(obj)
File "/Users/Documents/tools/spark-2.1.0/python/pyspark/cloudpickle.py", line 111, in dump
raise pickle.PicklingError(msg)
pickle.PicklingError: Could not pickle object as excessively deep recursion required.
I understand this might be due to recursion has a huge depth when serialise it. So how to handle this problem usually.
Thank you so much.

Related

Error when using joblib in python with undetected chromedriver

when i use (self.links is an array of strings)
Parallel(n_jobs=2)(delayed(self.buybysize)(link) for link in self.links)
with this function
def buybysize(self, link):
browser = self.browser()
//other commented stuff
def browser(self):
options = uc.ChromeOptions()
options.user_data_dir = self.user_data_dir
options.add_argument(self.add_argument)
driver = uc.Chrome(options=options)
return driver
i get the error
oblib.externals.loky.process_executor._RemoteTraceback:
Traceback (most recent call last):
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/externals/loky/process_executor.py", line 436, in _process_worker
r = call_item()
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/externals/loky/process_executor.py", line 288, in __call__
return self.fn(*self.args, **self.kwargs)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/_parallel_backends.py", line 595, in __call__
return self.func(*args, **kwargs)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/parallel.py", line 262, in __call__
return [func(*args, **kwargs)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/parallel.py", line 262, in <listcomp>
return [func(*args, **kwargs)
File "/home/Me/PycharmProjects/zalando_buy/Zalando.py", line 91, in buybysize
browser = self.browser()
File "/home/Me/PycharmProjects/zalando_buy/Zalando.py", line 38, in browser
driver = uc.Chrome(options=options)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/undetected_chromedriver/__init__.py", line 388, in __init__
self.browser_pid = start_detached(
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/undetected_chromedriver/dprocess.py", line 30, in start_detached
multiprocessing.Process(
File "/usr/lib/python3.8/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/usr/lib/python3.8/multiprocessing/context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/externals/loky/backend/process.py", line 39, in _Popen
return Popen(process_obj)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/externals/loky/backend/popen_loky_posix.py", line 52, in __init__
self._launch(process_obj)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/externals/loky/backend/popen_loky_posix.py", line 157, in _launch
pid = fork_exec(cmd_python, self._fds, env=process_obj.env)
AttributeError: 'Process' object has no attribute 'env'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/Me/PycharmProjects/zalando_buy/Start.py", line 4, in <module>
class Start:
File "/home/Me/PycharmProjects/zalando_buy/Start.py", line 7, in Start
zalando.startshopping()
File "/home/Me/PycharmProjects/zalando_buy/Zalando.py", line 42, in startshopping
self.openlinks()
File "/home/Me/PycharmProjects/zalando_buy/Zalando.py", line 50, in openlinks
Parallel(n_jobs=2)(delayed(self.buybysize)(link) for link in self.links)
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/parallel.py", line 1056, in __call__
self.retrieve()
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/parallel.py", line 935, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "/home/Me/PycharmProjects/zalando_buy/venv/lib/python3.8/site-packages/joblib/_parallel_backends.py", line 542, in wrap_future_result
return future.result(timeout=timeout)
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 444, in result
return self.__get_result()
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
raise self._exception
AttributeError: 'Process' object has no attribute 'env'
Process finished with exit code 1
For me it looks like there are instabilities because undetected chromedriver maybe uses multiprocessing already, but isnt there any way where i can open multiple Browsers with UC and process each iteration parallel?
Edit: i debugged and the error appears after trying to execute this line:
driver = uc.Chrome(options=options)

Write Shapefile to AWS S3 with geopandas in Glue Python Shell

I have read shapefile in a zip format from my S3 bucket successfully through geopandas, but I get error when trying to output the same geodataframe as a shapefile to the same S3 bucket.
The code below is how I read the zip file, and it works nicely:
## session for connecting to S3
session = boto3.session.Session(aws_access_key_id='MY-KEY-ID',
aws_secret_access_key='MY-KEY')
s3 = session.resource('s3')
bucket = s3.Bucket('my_bucket')
## read shapefile
TPG = bucket.Object(key='/shapefiles/grid.zip')
TPGrid = geopandas.read_file(TPG.get()['Body'])
But when I tried to output the same geodataframe like this:
TPGrid.to_file(filename='s3://my_bucket/output/TPGrid.zip', driver='ESRI Shapefile')
I will get error code:
ERROR:fiona._env:Only read-only mode is supported for /vsicurl
ERROR:fiona._env:Only read-only mode is supported for /vsicurl
ERROR:fiona._env:Only read-only mode is supported for /vsicurl
ERROR:fiona._env:Unable to open /vsis3/my_bucket/output/TPGrid.zip/TPGrid.shp or /vsis3/my_bucket/output/TPGrid.zip/TPGrid.SHP.
Traceback (most recent call last):
File "fiona/ogrext.pyx", line 1133, in fiona.ogrext.WritingSession.start
File "fiona/_err.pyx", line 291, in fiona._err.exc_wrap_pointer
fiona._err.CPLE_AppDefinedError: Unable to open /vsis3/my_bucket/output/TPGrid.zip/TPGrid.shp or /vsis3/my_bucket/output/TPGrid.zip/TPGrid.SHP.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/tmp/runscript.py", line 211, in <module>
runpy.run_path(temp_file_path, run_name='__main__')
File "/usr/local/lib/python3.6/runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "/usr/local/lib/python3.6/runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "/usr/local/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/tmp/glue-python-scripts-c8krhm5u/test_to_file_geo.py", line 40, in <module>
File "/glue/lib/installation/geopandas/geodataframe.py", line 1086, in to_file
_to_file(self, filename, driver, schema, index, **kwargs)
File "/glue/lib/installation/geopandas/io/file.py", line 328, in _to_file
filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs
File "/glue/lib/installation/fiona/env.py", line 408, in wrapper
return f(*args, **kwargs)
File "/glue/lib/installation/fiona/__init__.py", line 274, in open
**kwargs)
File "/glue/lib/installation/fiona/collection.py", line 165, in __init__
self.session.start(self, **kwargs)
File "fiona/ogrext.pyx", line 1141, in fiona.ogrext.WritingSession.start
fiona.errors.DriverIOError: Unable to open /vsis3/my_bucket/output/TPGrid.zip/TPGrid.shp or /vsis3/my_bucket/output/TPGrid.zip/TPGrid.SHP.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/tmp/runscript.py", line 230, in <module>
raise e_type(e_value).with_traceback(new_stack)
File "/tmp/glue-python-scripts-c8krhm5u/test_to_file_geo.py", line 40, in <module>
File "/glue/lib/installation/geopandas/geodataframe.py", line 1086, in to_file
_to_file(self, filename, driver, schema, index, **kwargs)
File "/glue/lib/installation/geopandas/io/file.py", line 328, in _to_file
filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs
File "/glue/lib/installation/fiona/env.py", line 408, in wrapper
return f(*args, **kwargs)
File "/glue/lib/installation/fiona/__init__.py", line 274, in open
**kwargs)
File "/glue/lib/installation/fiona/collection.py", line 165, in __init__
self.session.start(self, **kwargs)
File "fiona/ogrext.pyx", line 1141, in fiona.ogrext.WritingSession.start
fiona.errors.DriverIOError: Unable to open /vsis3/my_bucket/output/TPGrid.zip/TPGrid.shp or /vsis3/my_bucket/output/TPGrid.zip/TPGrid.SHP.
I have tried several ways, such as using '.csv' or '.shp', but not any one worked.
I am using python 3.6 and packages below, hope these information will help:
geopandas-0.9.0
shapely-1.7.1
fiona-1.8.20
GDAL-3.2.3
I kept fighting with this problem all day....
Any help will be highly appreciated.

How do I append rows to an excel spreadsheet with openpyxl and then save the excel file?

I'm writing a script that adds new data to an existing excel spreadsheet. Currently, the spreadsheet is 500k+ rows long. I've been using openpyxl to open the spreadsheet as xlsxwriter doesn't currently have any editing capabilities. However, when I use the provided append() method as explained in this answer to a similar problem.
I'm currently running Python 3.7.3 with openpyxl 2.6.2 on a Windows 7 computer.
from openpyxl import load_workbook, Workbook
records = [object list] #this is just a list of objects
file_name = 'existing_excel_file.xlsx'
excel_workbook = load_workbook(file_name, read_only=False)
worksheet = excel_workbook.active
row_list = []
for record in records:
row_number += 1
row_list.append([
str(record.weekno),
str(record.date1),
str(record.code),
str(record.customer),
str(record.date2
])
for row in row_list:
worksheet.append(row)
excel_workbook.save(file_name )
Obviously, it's supposed to save the file with the appended lines.
append() is working alright, but when I try to execute the save() method, I receive this error:
ValueError: I/O operation on closed file.
EDIT: At the suggestion of #CharlieClark, I grabbed the full traceback. I also noticed that there is a MemoryError that I simply didn't notice before (careless, I know) which might be the source of my issue; until this is resolved, I'm researching how to increase the memory being used in openpyxl as I'm sure that's probably the key. Regardless, here's the dump. Warning: it's a big hairy traceback.
Traceback (most recent call last):
File "C:\Users\davidm\AppData\Local\Programs\Python\Python37-32\Lib\xml\etree\ElementTree.py", line 836, in _get_writer
yield file.write
File "C:\Users\davidm\AppData\Local\Programs\Python\Python37-32\Lib\xml\etree\ElementTree.py", line 777, in write
short_empty_elements=short_empty_elements)
File "C:\Users\davidm\AppData\Local\Programs\Python\Python37-32\Lib\xml\etree\ElementTree.py", line 942, in _serialize_xml
short_empty_elements=short_empty_elements)
File "C:\Users\davidm\AppData\Local\Programs\Python\Python37-32\Lib\xml\etree\ElementTree.py", line 942, in _serialize_xml
short_empty_elements=short_empty_elements)
File "C:\Users\davidm\AppData\Local\Programs\Python\Python37-32\Lib\xml\etree\ElementTree.py", line 942, in _serialize_xml
short_empty_elements=short_empty_elements)
File "C:\Users\davidm\AppData\Local\Programs\Python\Python37-32\Lib\xml\etree\ElementTree.py", line 935, in _serialize_xml
write(" %s=\"%s\"" % (qnames[k], v))
MemoryError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "manage.py", line 21, in <module>
main()
File "manage.py", line 17, in main
execute_from_command_line(sys.argv)
File "C:\Users\davidm\projects\web_admin\venv\lib\site-packages\django\core\management\__init__.py", line 381, in execute_from_command_line
utility.execute()
File "C:\Users\davidm\projects\web_admin\venv\lib\site-packages\django\core\management\__init__.py", line 375, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "C:\Users\davidm\projects\web_admin\venv\lib\site-packages\django\core\management\base.py", line 316, in run_from_argv
self.execute(*args, **cmd_options)
File "C:\Users\davidm\projects\web_admin\venv\lib\site-packages\django\core\management\base.py", line 353, in execute
output = self.handle(*args, **options)
File "C:\Users\davidm\projects\web_admin\web_admin\ezcorp\management\commands\codes.py", line 183, in handle
File "C:\Users\davidm\projects\web_admin\venv\lib\site-packages\openpyxl\workbook\workbook.py", line 397, in save
save_workbook(self, filename)
File "C:\Users\davidm\projects\web_admin\venv\lib\site-packages\openpyxl\writer\excel.py", line 294, in save_workbook
writer.save()
File "C:\Users\davidm\projects\web_admin\venv\lib\site-packages\openpyxl\writer\excel.py", line 276, in save
self.write_data()
File "C:\Users\davidm\projects\rdm_admin\venv\lib\site-packages\openpyxl\writer\excel.py", line 76, in write_data
self._write_worksheets()
File "C:\Users\davidm\projects\rdm_admin\venv\lib\site-packages\openpyxl\writer\excel.py", line 216, in _write_worksheets
self.write_worksheet(ws)
File "C:\Users\davidm\projects\rdm_admin\venv\lib\site-packages\openpyxl\writer\excel.py", line 201, in write_worksheet
writer.write()
File "C:\Users\davidm\projects\rdm_admin\venv\lib\site-packages\openpyxl\worksheet\_writer.py", line 358, in write
self.close()
File "C:\Users\davidm\projects\rdm_admin\venv\lib\site-packages\openpyxl\worksheet\_writer.py", line 366, in close
self.xf.close()
File "C:\Users\davidm\projects\rdm_admin\venv\lib\site-packages\openpyxl\worksheet\_writer.py", line 297, in get_stream
pass
File "C:\Users\davidm\AppData\Local\Programs\Python\Python37-32\Lib\contextlib.py", line 119, in __exit__
next(self.gen)
File "C:\Users\davidm\projects\rdm_admin\venv\lib\site-packages\et_xmlfile\xmlfile.py", line 50, in element
self._write_element(el)
File "C:\Users\davidm\projects\rdm_admin\venv\lib\site-packages\et_xmlfile\xmlfile.py", line 77, in _write_element
xml = tostring(element)
File "C:\Users\davidm\AppData\Local\Programs\Python\Python37-32\Lib\xml\etree\ElementTree.py", line 1136, in tostring
short_empty_elements=short_empty_elements)
File "C:\Users\davidm\AppData\Local\Programs\Python\Python37-32\Lib\xml\etree\ElementTree.py", line 777, in write
short_empty_elements=short_empty_elements)
File "C:\Users\davidm\AppData\Local\Programs\Python\Python37-32\Lib\contextlib.py", line 130, in __exit__
self.gen.throw(type, value, traceback)
File "C:\Users\davidm\AppData\Local\Programs\Python\Python37-32\Lib\xml\etree\ElementTree.py", line 836, in _get_writer
yield file.write
File "C:\Users\davidm\AppData\Local\Programs\Python\Python37-32\Lib\contextlib.py", line 511, in __exit__
raise exc_details[1]
File "C:\Users\davidm\AppData\Local\Programs\Python\Python37-32\Lib\contextlib.py", line 496, in __exit__
if cb(*exc_details):
File "C:\Users\davidm\AppData\Local\Programs\Python\Python37-32\Lib\contextlib.py", line 383, in _exit_wrapper
callback(*args, **kwds)
ValueError: I/O operation on closed file.

PyTorch Getting Started example not working

I followed this tutorial in the Getting Started section on the PyTorch website: "Deep Learning with PyTorch: A 60 Minute Blitz" and I downloaded the code for "Training a Classifier" on the bottom of the page and I ran it, and it's not working for me. I'm using the CPU version of PyTorch if that makes a difference. I'm new to Python and basically learning it for Pytorch. Here's the error message, Control + K isn't working for me because I think the editing interface is different for the first few posts and Stack Overflow needs to fix it. Or it could just be my browser:
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 105, in spawn_main
exitcode = _main(fd)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 114, in _main
prepare(preparation_data)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 225, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 277, in _fixup_main_from_path
run_name="__mp_main__")
File "C:\ProgramData\Anaconda3\lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "C:\ProgramData\Anaconda3\lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "C:\ProgramData\Anaconda3\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Users\Anonymous\PycharmProjects\pytorchHelloWorld\train_network.py", line 100, in <module>
dataiter = iter(trainloader)
File "C:\ProgramData\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 819, in __iter__
return _DataLoaderIter(self)
File "C:\ProgramData\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 560, in __init__
w.start()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 112, in start
self._popen = self._Popen(self)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 33, in __init__
prep_data = spawn.get_preparation_data(process_obj._name)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 143, in get_preparation_data
_check_not_importing_main()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 136, in _check_not_importing_main
is not going to be frozen to produce an executable.''')
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
Traceback (most recent call last):
File "C:/Users/Anonymous/PycharmProjects/pytorchHelloWorld/train_network.py", line 100, in <module>
dataiter = iter(trainloader)
File "C:\ProgramData\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 819, in __iter__
return _DataLoaderIter(self)
File "C:\ProgramData\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 560, in __init__
w.start()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 112, in start
self._popen = self._Popen(self)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 65, in __init__
reduction.dump(process_obj, to_child)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
BrokenPipeError: [Errno 32] Broken pipe
The error is likely due to multiprocessing in DataLoader and Windows since the tutorial is using num_workers=2. Python3 documentation shares some guidelines on this:
Make sure that the main module can be safely imported by a new Python interpreter without causing unintended side effects (such a starting a new process).
You can either set num_workers=0 or you need to wrap your code within if __name__ == '__main__'
# Safe DataLoader multiprocessing with Windows
if __name__ == '__main__':
# Code to load the data with num_workers > 1
Check this reply on PyTorch forum for more details and this issue on GitHub.

Using filepaths as global variables in Python

I have a file global_vars.py that contains file paths saved as variables:
from pandas import Timestamp
final_vol_path = 'datasets/final_vols.csv'
final_price_path = 'datasets/final_prices.csv'
final_start_date = Timestamp('2017-01-01')
with other variables written in a similar fashion. However, the functions that I'm using to read in the data throw a FileNotFoundError when attempting to do the following in file1.py:
import scripts.global_vars as gv
read_data(gv.final_vol_path, gv.final_price_path) # throws FileNotFoundError
read_data('datasets/final_vols.csv', 'datasets/final_prices.csv') # this passes
Additionally, I've checked the file paths, and have gotten the following:
gv.final_vol_path == 'datasets/final_vols.csv' # returns True
gv.final_price_path == 'datasets/final_prices.csv' # returns True
Moreover, the pandas Timestamp object is processed without any problems.
Is there any explanation for why the FileNotFoundError is being thrown when attempting to access the file path as a variable from global_vars.py, but is not thrown when the actual string is passed in?
EDIT: The overall directory structure is as follows:
working_dir
L file1.py
L scripts
L global_vars.py
L datasets
L final_vols.csv
L final_prices.csv
EDIT 2: I added in a try-catch block to ensure the rest of the function doesn't break, not sure if that has affected the traceback, but here's what I get:
Traceback (most recent call last):
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\runpy.py", line
184, in _run_module_as_main
"__main__", mod_spec)
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Users\Ananth\Anaconda3\envs\analytics-cpu\Scripts\nose2.exe\__main__.py", line 9, in <module>
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\nose2\main.py", line 306, in discover
return main(*args, **kwargs)
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\nose2\main.py", line 100, in __init__
super(PluggableTestProgram, self).__init__(**kw)
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\unittest\main.py", line 93, in __init__
self.parseArgs(argv)
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\nose2\main.py", line 133, in parseArgs
self.createTests()
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\nose2\main.py", line 258, in createTests
self.testNames, self.module)
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\nose2\loader.py", line 69, in loadTestsFromNames
for name in event.names]
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\nose2\loader.py", line 69, in <listcomp>
for name in event.names]
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\nose2\loader.py", line 84, in loadTestsFromName
result = self.session.hooks.loadTestsFromName(event)
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\nose2\events.py", line 224, in __call__
result = getattr(plugin, self.method)(event)
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\nose2\plugins\loader\testcases.py", line 56, in loadTestsFromName
result = util.test_from_name(name, module)
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\nose2\util.py", line 106, in test_from_name
parent, obj = object_from_name(name, module)
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\nose2\util.py", line 117, in object_from_name
module = __import__('.'.join(parts_copy))
File "C:\Users\Ananth\Desktop\Modules\PortfolioVARModule\tests\test_simulation.py", line 24, in <module>
gv.test_start_date)
File "C:\Users\Ananth\Desktop\Modules\PortfolioVARModule\scripts\prep_data.py", line 119, in read_data
priceDF = pd.read_csv(pricepath).dropna()
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\pandas\io\parsers.py", line 646, in parser_f
return _read(filepath_or_buffer, kwds)
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\pandas\io\parsers.py", line 389, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\pandas\io\parsers.py", line 730, in __init__
self._make_engine(self.engine)
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\pandas\io\parsers.py", line 923, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "c:\users\ananth\anaconda3\envs\analytics-cpu\lib\site-packages\pandas\io\parsers.py", line 1390, in __init__
self._reader = _parser.TextReader(src, **kwds)
File "pandas\parser.pyx", line 373, in pandas.parser.TextReader.__cinit__ (pandas\parser.c:4184)
File "pandas\parser.pyx", line 667, in pandas.parser.TextReader._setup_parser_source (pandas\parser.c:8449)
FileNotFoundError: File b'datasets/corn_price.csv' does not exist
Problem is the addition of the letter b in front of your file's path.
You get the b because you encoded to utf-8.
Try:
read_data(str(gv.final_vol_path,'utf-8'), str(gv.final_price_path, 'utf-8'))

Resources