I am trying to just read in the data from Apache tika library to parse the pdf files. I installed it through pip install tika using python 3.
Code:
from tika import parser
parsedPDF = parser.from_file("test.pdf",serverEndpoint='http://localhost:9998')
or
from tika import parser
parsedPDF = parser.from_file("test.pdf")
Error:
Traceback (most recent call last):
File "tikaparsing-test.py", line 2, in <module>
parsedPDF = parser.from_file("test.pdf",serverEndpoint='http://localhost:9998')
File "C:\ProgramData\Anaconda3\lib\site-packages\tika\parser.py", line 36, in from_file
jsonOutput = parse1('all', filename, serverEndpoint, headers=headers)
File "C:\ProgramData\Anaconda3\lib\site-packages\tika\tika.py", line 316, in parse1
headers, verbose, tikaServerJar, rawResponse=rawResponse)
File "C:\ProgramData\Anaconda3\lib\site-packages\tika\tika.py", line 510, in callServer
serverEndpoint = checkTikaServer(scheme, serverHost, port, tikaServerJar, classpath)
File "C:\ProgramData\Anaconda3\lib\site-packages\tika\tika.py", line 565, in checkTikaServer
startServer(jarPath, serverHost, port, classpath)
File "C:\ProgramData\Anaconda3\lib\site-packages\tika\tika.py", line 609, in startServer
cmd = Popen(cmd , stdout= logFile, stderr = STDOUT, shell =True)
File "C:\ProgramData\Anaconda3\lib\subprocess.py", line 709, in __init__
restore_signals, start_new_session)
File "C:\ProgramData\Anaconda3\lib\subprocess.py", line 997, in _execute_child
startupinfo)
PermissionError: [WinError 5] Access is denied
Related
Traceback (most recent call last):
File "C:\Users\SAMSUNG\Desktop\Whisper\test.py", line 4, in
audio = whisper.load_audio("audio.mp3")
File "C:\Users\SAMSUNG\AppData\Local\Programs\Python\Python37\lib\site-packages\whisper\audio.py", line 44, in load_audio
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
File "C:\Users\SAMSUNG\AppData\Local\Programs\Python\Python37\lib\site-packages\ffmpeg_run.py", line 320, in run
overwrite_output=overwrite_output,
File "C:\Users\SAMSUNG\AppData\Local\Programs\Python\Python37\lib\site-packages\ffmpeg_run.py", line 285, in run_async
args, stdin=stdin_stream, stdout=stdout_stream, stderr=stderr_stream
File "C:\Users\SAMSUNG\AppData\Local\Programs\Python\Python37\lib\subprocess.py", line 800, in init
restore_signals, start_new_session)
File "C:\Users\SAMSUNG\AppData\Local\Programs\Python\Python37\lib\subprocess.py", line 1207, in _execute_child
startupinfo)
FileNotFoundError: [WinError 2] The system cannot find the file specified
While the epub is being generated successfully, but when I try to read the epub using readers like Calibre or Sigil. They throw errors like certain files are missing.
Here's my code to generate the epub file:
book = epub.EpubBook()
book.set_title(novelName)
book.set_language("en")
book.set_cover('temp.jpg', content=open('temp.jpg','rb').read())
book.set_identifier("test")
for i in authorNames:
book.add_author(i)
for i in range(1):
driver.get(chapterLinks[i])
try:
content=driver.find_element_by_id('chr-content').get_attribute("innerHTML")
time.sleep(5)
except Exception as e:
driver.close()
driver = webdriver.Chrome(ChromeDriverManager().install(),options=options)
driver.get(chapterLinks[i])
content=driver.find_element_by_id('chr-content').get_attribute("innerHTML")
time.sleep(5)
soup = BeautifulSoup(content)
ads=soup.find("div", class_="ads-holder")
if(ads!=None):
ads.decompose()
print(chapterNames[i], chapterLinks[i])
chapterName=chapterNames[i].replace("-","")
c=epub.EpubHtml(title=chapterName,
file_name='{}.xhtml'.format(chapterName),
lang='en')
c.set_content(str(soup).encode('utf-8'))
book.add_item(c)
chapterList.append(c)
book.toc = chapterList
book.spine = chapterList
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
epub.write_epub('test.epub', book)
and here are the errors:
Calibre :
calibre, version 5.20.0
ERROR: Loading book failed: Failed to open the book at C:\Users\xxxxx\Documents\Visual Studio 2019\PersonalProjects\Novel Grabber\test.epub. Click "Show details" for more info.
Failed to convert book: C:\Users\xxxxx\Documents\Visual Studio 2019\PersonalProjects\Novel Grabber\test.epub with error:
InputFormatPlugin: EPUB Input running
on C:\Users\xxxxx\Documents\Visual Studio 2019\PersonalProjects\Novel Grabber\test.epub
Failed to run pipe worker with command: from calibre.srv.render_book import viewer_main; viewer_main()
Traceback (most recent call last):
File "runpy.py", line 194, in _run_module_as_main
File "runpy.py", line 87, in _run_code
File "site.py", line 82, in <module>
File "site.py", line 77, in main
File "site.py", line 49, in run_entry_point
File "calibre\utils\ipc\worker.py", line 197, in main
File "<string>", line 1, in <module>
File "calibre\srv\render_book.py", line 824, in viewer_main
File "calibre\srv\render_book.py", line 815, in render_for_viewer
File "calibre\srv\render_book.py", line 793, in render
File "calibre\srv\render_book.py", line 601, in process_exploded_book
File "calibre\srv\render_book.py", line 604, in <setcomp>
File "calibre\ebooks\oeb\polish\container.py", line 561, in has_name_and_is_not_empty
File "genericpath.py", line 50, in getsize
FileNotFoundError: [WinError 2] The system cannot find the file specified: 'C:\\Users\\xxxxxx\\AppData\\Local\\calibre-cache\\ev2\\t\\c0-vdo66nim\\EPUB\\Chapter 2 '
Sigil:
Files exist in epub that are not listed in manifest, they will be ignored
Does anybody know what could be the cause for this?
I have read shapefile in a zip format from my S3 bucket successfully through geopandas, but I get error when trying to output the same geodataframe as a shapefile to the same S3 bucket.
The code below is how I read the zip file, and it works nicely:
## session for connecting to S3
session = boto3.session.Session(aws_access_key_id='MY-KEY-ID',
aws_secret_access_key='MY-KEY')
s3 = session.resource('s3')
bucket = s3.Bucket('my_bucket')
## read shapefile
TPG = bucket.Object(key='/shapefiles/grid.zip')
TPGrid = geopandas.read_file(TPG.get()['Body'])
But when I tried to output the same geodataframe like this:
TPGrid.to_file(filename='s3://my_bucket/output/TPGrid.zip', driver='ESRI Shapefile')
I will get error code:
ERROR:fiona._env:Only read-only mode is supported for /vsicurl
ERROR:fiona._env:Only read-only mode is supported for /vsicurl
ERROR:fiona._env:Only read-only mode is supported for /vsicurl
ERROR:fiona._env:Unable to open /vsis3/my_bucket/output/TPGrid.zip/TPGrid.shp or /vsis3/my_bucket/output/TPGrid.zip/TPGrid.SHP.
Traceback (most recent call last):
File "fiona/ogrext.pyx", line 1133, in fiona.ogrext.WritingSession.start
File "fiona/_err.pyx", line 291, in fiona._err.exc_wrap_pointer
fiona._err.CPLE_AppDefinedError: Unable to open /vsis3/my_bucket/output/TPGrid.zip/TPGrid.shp or /vsis3/my_bucket/output/TPGrid.zip/TPGrid.SHP.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/tmp/runscript.py", line 211, in <module>
runpy.run_path(temp_file_path, run_name='__main__')
File "/usr/local/lib/python3.6/runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "/usr/local/lib/python3.6/runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "/usr/local/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/tmp/glue-python-scripts-c8krhm5u/test_to_file_geo.py", line 40, in <module>
File "/glue/lib/installation/geopandas/geodataframe.py", line 1086, in to_file
_to_file(self, filename, driver, schema, index, **kwargs)
File "/glue/lib/installation/geopandas/io/file.py", line 328, in _to_file
filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs
File "/glue/lib/installation/fiona/env.py", line 408, in wrapper
return f(*args, **kwargs)
File "/glue/lib/installation/fiona/__init__.py", line 274, in open
**kwargs)
File "/glue/lib/installation/fiona/collection.py", line 165, in __init__
self.session.start(self, **kwargs)
File "fiona/ogrext.pyx", line 1141, in fiona.ogrext.WritingSession.start
fiona.errors.DriverIOError: Unable to open /vsis3/my_bucket/output/TPGrid.zip/TPGrid.shp or /vsis3/my_bucket/output/TPGrid.zip/TPGrid.SHP.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/tmp/runscript.py", line 230, in <module>
raise e_type(e_value).with_traceback(new_stack)
File "/tmp/glue-python-scripts-c8krhm5u/test_to_file_geo.py", line 40, in <module>
File "/glue/lib/installation/geopandas/geodataframe.py", line 1086, in to_file
_to_file(self, filename, driver, schema, index, **kwargs)
File "/glue/lib/installation/geopandas/io/file.py", line 328, in _to_file
filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs
File "/glue/lib/installation/fiona/env.py", line 408, in wrapper
return f(*args, **kwargs)
File "/glue/lib/installation/fiona/__init__.py", line 274, in open
**kwargs)
File "/glue/lib/installation/fiona/collection.py", line 165, in __init__
self.session.start(self, **kwargs)
File "fiona/ogrext.pyx", line 1141, in fiona.ogrext.WritingSession.start
fiona.errors.DriverIOError: Unable to open /vsis3/my_bucket/output/TPGrid.zip/TPGrid.shp or /vsis3/my_bucket/output/TPGrid.zip/TPGrid.SHP.
I have tried several ways, such as using '.csv' or '.shp', but not any one worked.
I am using python 3.6 and packages below, hope these information will help:
geopandas-0.9.0
shapely-1.7.1
fiona-1.8.20
GDAL-3.2.3
I kept fighting with this problem all day....
Any help will be highly appreciated.
I am trying to host django project on EC2 usnig apache2 which is using mongodb atlas as database.
When I am running this locally on EC2 after openning some port like 8000, it runs properly but on apache it's giving error.
Below is some TLSFeature error, I am facing
mod_wsgi (pid=67994, process='tracky', application='ip-IP_ADDRESS|'): Loading Python script file '/home/tracky/tracky-api/tracky/wsgi.py'.
mongodb+srv://username:password#host/dbname?retryWrites=true&w=majority
mod_wsgi (pid=67994): Exception occurred processing WSGI script '/home/tracky/tracky-api/tracky/wsgi.py'.
Traceback (most recent call last):
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/pymongo/pool.py", line 1278, in _get_socket
sock_info = self.sockets.popleft()
IndexError: pop from an empty deque
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/django/core/handlers/exception.py", line 47, in inner
response = get_response(request)
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site- File "/home/tracky/tracky-api/trackyapi/urls.py", line 2, in <module>
from .views import Tracky
File "/home/tracky/tracky-api/trackyapi/views.py", line 16, in <module>
from .serializers import GoalsSerializer, GoalSerializer
File "/home/tracky/tracky-api/trackyapi/serializers.py", line 9, in <module>
class GoalSerializer(DocumentSerializer):
File "/home/tracky/tracky-api/trackyapi/serializers.py", line 11, in GoalSerializer
uid = ReferenceField(User, write_only=True)
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/rest_framework_mongoengine/fields.py", line 217, in __init__
self.queryset = model.objects
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/mongoengine/queryset/manager.py", line 38, in __get__
queryset = queryset_class(owner, owner._get_collection())
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/mongoengine/document.py", line 215, in _get_collection
db = cls._get_db()
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/mongoengine/document.py", line 193, in _get_db
return get_db(cls._meta.get("db_alias", DEFAULT_CONNECTION_NAME))
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/mongoengine/connection.py", line 363, in get_db
db.authenticate(
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/pymongo/database.py", line 1492, in authenticate
self.client._cache_credentials(
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/pymongo/mongo_client.py", line 780, in _cache_credentials
with server.get_socket(all_credentials) as sock_info:
File "/usr/lib/python3.8/contextlib.py", line 113, in __enter__
return next(self.gen)
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/pymongo/pool.py", line 1231, in get_socket
sock_info = self._get_socket(all_credentials)
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/pymongo/pool.py", line 1281, in _get_socket
sock_info = self.connect(all_credentials)
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/pymongo/pool.py", line 1180, in connect
sock = _configured_socket(self.address, self.opts)
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/pymongo/pool.py", line 1002, in _configured_socket
sock = ssl_context.wrap_socket(sock, server_hostname=host)
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/pymongo/pyopenssl_context.py", line 313, in wrap_socket
ssl_conn.do_handshake()
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/pymongo/pyopenssl_context.py", line 116, in do_handshake
return self._call(super(_sslConn, self).do_handshake, *args, **kwargs)
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/pymongo/pyopenssl_context.py", line 107, in _call
return call(*args, **kwargs)
File "/usr/lib/python3/dist-packages/OpenSSL/SSL.py", line 1915, in do_handshake
self._raise_ssl_error(self._ssl, result)
File "/usr/lib/python3/dist-packages/OpenSSL/SSL.py", line 1618, in _raise_ssl_error
self._context._ocsp_helper.raise_if_problem()
File "/usr/lib/python3/dist-packages/OpenSSL/SSL.py", line 294, in raise_if_problem
raise self._problems.pop(0)
File "/usr/lib/python3/dist-packages/OpenSSL/SSL.py", line 590, in wrapper
valid = callback(conn, ocsp_data, data)
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/pymongo/ocsp_support.py", line 292, in _ocsp_callback
ext = _get_extension(cert, _TLSFeature)
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/pymongo/ocsp_support.py", line 119, in _get_extension
return cert.extensions.get_extension_for_class(klass)
File "/home/ubuntu/anaconda3/envs/tracky/lib/python3.6/site-packages/cryptography/x509/extensions.py", line 134, in get_extension_for_class
raise ExtensionNotFound(
cryptography.x509.extensions.ExtensionNotFound: No <class 'cryptography.x509.extensions.TLSFeature'> extension was found
Any suggestion will be helpful.
Thanks.
I'm trying to run a video processing code on NVIDIA TX2 using moviepy. The code is:
clip = VideoFileClip(video_file)
video_clip = clip.fl_image(process_vid)
video_clip.write_videofile(output_vid2)
I get the error in the first line. The full error is:
Traceback (most recent call last):
File "img_test.py", line 117, in <module>
clip = VideoFileClip(video_file)
File "/home/nvidia/.local/lib/python3.5/site-packages/moviepy/video/io/VideoFileClip.py", line 91, in __init__
fps_source=fps_source)
File "/home/nvidia/.local/lib/python3.5/site-packages/moviepy/video/io/ffmpeg_reader.py", line 33, in __init__
fps_source)
File "/home/nvidia/.local/lib/python3.5/site-packages/moviepy/video/io/ffmpeg_reader.py", line 256, in ffmpeg_parse_infos
proc = sp.Popen(cmd, **popen_params)
File "/usr/lib/python3.5/subprocess.py", line 947, in __init__
restore_signals, start_new_session)
File "/usr/lib/python3.5/subprocess.py", line 1551, in _execute_child
raise child_exception_type(errno_num, err_msg)
OSError: [Errno 8] Exec format error
I even used the refernce of this but nothing seems to work.
Any suggestions?