When I tried to call Cloud Video Intelligence API to detec subtitle in local video file.It always returned error 400 or 504, but use gas is fine.I have tried to adjusted timeout in Cloud Video Intelligence config but it still show error 400 with invalid argument.
this is my python code for detecting video subtitle:
"""This application demonstrates detection subtitles in video using the Google Cloud API.
Usage Examples:
use video in google cloud storge:
python analyze.py text_gcs gs://"video path"
use video in computer:
python analyze.py text_file video.mp4
"""
import argparse
import io
from google.cloud import videointelligence
from google.cloud.videointelligence import enums
def video_detect_text_gcs(input_uri):
# [START video_detect_text_gcs]
"""Detect text in a video stored on GCS."""
from google.cloud import videointelligence
video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.TEXT_DETECTION]
config = videointelligence.types.TextDetectionConfig(language_hints=["zh-TW","en-US"])
video_context = videointelligence.types.VideoContext(text_detection_config=config)
operation = video_client.annotate_video(input_uri=input_uri, features=features, video_context=video_context)
print("\nSubtitle detecting......")
result = operation.result(timeout=300)
# The first result is retrieved because a single video was processed.
annotation_result = result.annotation_results[0]
subtitle_data=[ ]
for text_annotation in annotation_result.text_annotations:
text_segment = text_annotation.segments[0]
start_time = text_segment.segment.start_time_offset
frame = text_segment.frames[0]
vertex=frame.rotated_bounding_box.vertices[0]
if text_segment.confidence > 0.95 and vertex.y >0.7:
lists=[text_annotation.text,start_time.seconds+ start_time.nanos * 1e-9,vertex.y]
subtitle_data=subtitle_data+[lists]
length=len(subtitle_data)
subtitle_sort=sorted(subtitle_data,key = lambda x: (x[1],x[2]))
i=0
subtitle=[ ]
while i<length :
subtitle=subtitle+[subtitle_sort[i][0]]
i=i+1
with open("subtitle.txt",mode="w",encoding="utf-8") as file:
for x in subtitle:
file.write(x+'\n')
def video_detect_text(path):
# [START video_detect_text]
"""Detect text in a local video."""
from google.cloud import videointelligence
video_client = videointelligence.VideoIntelligenceServiceClient()
features = [videointelligence.enums.Feature.TEXT_DETECTION]
video_context = videointelligence.types.VideoContext()
with io.open(path, "rb") as file:
input_content = file.read()
operation = video_client.annotate_video(
input_content=input_content, # the bytes of the video file
features=features,
video_context=video_context
)
print("\nSubtitle detecting......")
result = operation.result(timeout=300)
# The first result is retrieved because a single video was processed.
annotation_result = result.annotation_results[0]
subtitle_data=[ ]
for text_annotation in annotation_result.text_annotations:
text_segment = text_annotation.segments[0]
start_time = text_segment.segment.start_time_offset
frame = text_segment.frames[0]
vertex=frame.rotated_bounding_box.vertices[0]
if text_segment.confidence > 0.95 and vertex.y >0.7:
lists=[text_annotation.text,start_time.seconds+ start_time.nanos * 1e-9,vertex.y]
subtitle_data=subtitle_data+[lists]
length=len(subtitle_data)
subtitle_sort=sorted(subtitle_data,key = lambda x: (x[1],x[2]))
i=0
subtitle=[ ]
while i<length :
subtitle=subtitle+[subtitle_sort[i][0]]
i=i+1
with open("subtitle.txt",mode="w",encoding="utf-8") as file:
for x in subtitle:
file.write(x+'\n')
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
subparsers = parser.add_subparsers(dest="command")
detect_text_parser = subparsers.add_parser(
"text_gcs", help=video_detect_text_gcs.__doc__
)
detect_text_parser.add_argument("path")
detect_text_file_parser = subparsers.add_parser(
"text_file", help=video_detect_text.__doc__
)
detect_text_file_parser.add_argument("path")
args = parser.parse_args()
if args.command == "text_gcs":
video_detect_text_gcs(args.path)
if args.command == "text_file":
video_detect_text(args.path)
This is error report:
Ghuang#/Users/Ghuang/Documents/GitHub/Video-subtitles-detection$ python3 analyze.py text_file video.mp4
Traceback (most recent call last):
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/google/api_core/grpc_helpers.py", line 57, in error_remapped_callable
return callable_(*args, **kwargs)
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/grpc/_channel.py", line 826, in __call__
return _end_unary_response_blocking(state, call, False, None)
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/grpc/_channel.py", line 729, in _end_unary_response_blocking
raise _InactiveRpcError(state)
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.DEADLINE_EXCEEDED
details = "Deadline Exceeded"
debug_error_string = "{"created":"#1587691109.677447000","description":"Error received from peer ipv4:172.217.24.10:443","file":"src/core/lib/surface/call.cc","file_line":1056,"grpc_message":"Deadline Exceeded","grpc_status":4}"
>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "analyze.py", line 144, in <module>
video_detect_text(args.path)
File "analyze.py", line 90, in video_detect_text
video_context=video_context
File "/Library/Python/3.7/site-packages/google/cloud/videointelligence_v1/gapic/video_intelligence_service_client.py", line 303, in annotate_video
request, retry=retry, timeout=timeout, metadata=metadata
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/google/api_core/gapic_v1/method.py", line 143, in __call__
return wrapped_func(*args, **kwargs)
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/google/api_core/retry.py", line 286, in retry_wrapped_func
on_error=on_error,
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/google/api_core/retry.py", line 184, in retry_target
return target()
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/google/api_core/timeout.py", line 214, in func_with_timeout
return func(*args, **kwargs)
File "/Users/Ghuang/Library/Python/3.7/lib/python/site-packages/google/api_core/grpc_helpers.py", line 59, in error_remapped_callable
six.raise_from(exceptions.from_grpc_error(exc), exc)
File "<string>", line 3, in raise_from
google.api_core.exceptions.DeadlineExceeded: 504 Deadline Exceeded
Related
i have been trying to show a score converted from krn file
import music21 as m
import os
test_data = "D:/Programming/DATA - SCIENCE/deep learning/music generation/data/test"
def load_krn_files(data_path):
# go through the whole files
songs = []
for path , subdirs,files in os.walk(data_path):
for file in files :
if file [-3:] == "krn":
song = m.converter.parse(os.path.join(path,file))
songs.append(song)
return songs
def preproccessing(data_path):
pass
#1 ) load the kern files and pars them
if __name__ == "__main__":
songs = load_krn_files(test_data)
print(f"loaded {len(songs)} songs.")
song = songs[0]
song.show()
but the method (show) return the following error
loaded 12 songs.
Traceback (most recent call last):
File "d:/Programming/DATA - SCIENCE/deep learning/music generation/scripts/preprocess.py", line 25, in
song.show()
File "C:\Users\ae504\AppData\Local\Programs\Python\Python38\lib\site-packages\music21\stream\base.py", line 334, in show
return super().show(fmt=fmt, app=app, **keywords) File "C:\Users\ae504\AppData\Local\Programs\Python\Python38\lib\site-packages\music21\base.py", line 2788, in show
return formatWriter.show(self,
File "C:\Users\ae504\AppData\Local\Programs\Python\Python38\lib\site-packages\music21\converter\subConverters.py", line 1114, in show
self.launch(returnedFilePath, fmt=fmt, app=app)
File "C:\Users\ae504\AppData\Local\Programs\Python\Python38\lib\site-packages\music21\converter\subConverters.py", line 197, in launch
subprocess.run(cmd, check=False, shell=shell)
File "C:\Users\ae504\AppData\Local\Programs\Python\Python38\lib\site-packages\run_init_.py", line 145, in new
process = cls.create_process(command, stdin, cwd=cwd, env=env, shell=shell)
File "C:\Users\ae504\AppData\Local\Programs\Python\Python38\lib\site-packages\run_init_.py", line 121, in create_process
shlex.split(command),
File "C:\Users\ae504\AppData\Local\Programs\Python\Python38\lib\shlex.py", line 311, in split
return list(lex)
File "C:\Users\ae504\AppData\Local\Programs\Python\Python38\lib\shlex.py", line 300, in next
token = self.get_token()
File "C:\Users\ae504\AppData\Local\Programs\Python\Python38\lib\shlex.py", line 109, in get_token
raw = self.read_token()
File "C:\Users\ae504\AppData\Local\Programs\Python\Python38\lib\shlex.py", line 140, in read_token
nextchar = self.instream.read(1)
AttributeError: 'tuple' object has no attribute 'read'
I am trying to do OCR(Optical Character recognition) on multiple images present in a folder. I am using python multiprocessing module to do it parallely. I am spawning new processes.
When I am running code directly using python command it works fine but when I import Recogniser class and call process_file, it keeps on spawning new process from calling class.
I tried freeze_support but that didn't help.
Right now to make it work, I am doing subprocess call which is not at all right
ImageRecogniser code:
from tesserocr import PyTessBaseAPI
import os
from multiprocessing import get_context, Value
import sys
import time
from datetime import datetime
import json
from PIL import Image
from itertools import islice
queue_counter = Value('i', 0)
class ImageRecogniser:
def __init__(self, input_folder_name):
self.input_folder_name = input_folder_name
self.total_files = 0
def get_images(self, extension=(".png",)):
files_in_dir = os.listdir(self.input_folder_name)
image_list = [os.path.join(self.input_folder_name, file_) for file_ in files_in_dir
if os.path.splitext(file_)[-1] in extension]
self.total_files = len(image_list)
return image_list
def process_images(self, images):
string_list = []
api = PyTessBaseAPI()
for count, image in enumerate(images):
img = Image.open(image)
api.SetImage(img)
output_text = api.GetUTF8Text()
temp_filename = os.path.splitext(os.path.split(image)[-1])[0]
page_number = temp_filename.split("_")[-1]
print("Processed page : {}".format(page_number))
output_dict = {"page_number": page_number, "output": output_text}
string_list.append(output_dict)
api.End()
return string_list
def process_file(self, parallel=False):
print("Getting images from pdf : {}".format(datetime.now()))
image_list = self.get_images()
total_no_of_pages = len(image_list)
print("Initiating extraction : {}".format(datetime.now()))
begin_time = datetime.now()
if parallel:
available_cpus = len(os.sched_getaffinity(0))
pool_workers = available_cpus // 1
if total_no_of_pages > pool_workers:
size_of_queue = total_no_of_pages // pool_workers
split_queue = [size_of_queue] * pool_workers
sum_of_split_queue = sum(split_queue)
if sum_of_split_queue != total_no_of_pages:
pages_left = total_no_of_pages - sum_of_split_queue
for i in range(pages_left):
split_queue[i] = split_queue[i] + 1
else:
split_queue = [0] * pool_workers
for i in range(total_no_of_pages):
split_queue[i] = 1
# page_ids_list_iterator = iter(page_ids_list)
print('Split queue : {}'.format(split_queue))
to_be_processed = []
start = 1
for chunksize in split_queue:
end = chunksize + start
to_be_processed.append(image_list[start:end])
start = end
with get_context('spawn').Pool(processes=pool_workers) as pool:
result = pool.map_async(self.process_images, to_be_processed)
output = result.get()
output = [actual_value for queue_list in output for actual_value in queue_list]
#json_output = json.dumps(output)
else:
output = self.process_images(image_list)
json_file_name = self.input_folder_name+ ".json"
with open(json_file_name, "w") as fp:
json.dump(output, fp)
if __name__ == '__main__':
source = sys.argv[1]
t = ImageRecogniser(source)
try:
current = os.environ["OMP_THREAD_LIMIT"]
except:
current = None
os.environ["OMP_THREAD_LIMIT"] = "1"
t.process_file(parallel=True)
if current:
os.environ["OMP_THREAD_LIMIT"] = str(current)
worker.py
import os
import logging
import yaml
from pdf_converter.pdf_to_image import PDFtoImage
from pdf_converter.settings import DATA_FOLDER_PATH, LOGGER_FILE_PATH
from recogniser.image_recogniser import ImageRecogniser
import subprocess
with open(LOGGER_FILE_PATH, 'r') as f:
config = yaml.safe_load(f.read())
logging.config.dictConfig(config)
logger = logging.getLogger(__name__)
logger.info("STARTING WORKER")
file_name = "4506-T.pdf"
file_path = os.path.join(DATA_FOLDER_PATH, file_name)
logger.info("STARTING PDFTOIMAGE CONVERSION")
pdf_image = PDFtoImage(file_path)
files = pdf_image.run()
logger.info("CREATED {} IMAGE FILES FROM {}".format(len(files),file_name))
image_files_path = os.path.join(DATA_FOLDER_PATH, os.path.splitext(file_name)[0])
# subprocess.call(['python', 'recogniser/image_recogniser_cmd.py', image_files_path])
img_rec = ImageRecogniser(image_files_path)
img_rec.process_file(parallel=True)
error
2020-07-27 19:57:49,244 - __mp_main__ - INFO - CREATED 53 IMAGE FILES FROM 4506-T.pdf
Getting images from pdf : /home/shashank-mq/project/doOCR/data/4506-T.pdf
Initiating extraction : 2020-07-27 19:57:49.245889
Split queue : [7, 7, 7, 7, 7, 6, 6, 6]
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/usr/lib/python3.6/multiprocessing/spawn.py", line 105, in spawn_main
exitcode = _main(fd)
File "/usr/lib/python3.6/multiprocessing/spawn.py", line 114, in _main
prepare(preparation_data)
File "/usr/lib/python3.6/multiprocessing/spawn.py", line 225, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "/usr/lib/python3.6/multiprocessing/spawn.py", line 277, in _fixup_main_from_path
run_name="__mp_main__")
File "/usr/lib/python3.6/runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "/usr/lib/python3.6/runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/shashank-mq/project/doOCR/worker.py", line 27, in <module>
img_rec.process_file(parallel=True)
File "/home/shashank-mq/project/doOCR/recogniser/image_recogniser.py", line 78, in process_file
with get_context('spawn').Pool(processes=pool_workers) as pool:
File "/usr/lib/python3.6/multiprocessing/context.py", line 119, in Pool
context=self.get_context())
File "/usr/lib/python3.6/multiprocessing/pool.py", line 174, in __init__
self._repopulate_pool()
File "/usr/lib/python3.6/multiprocessing/pool.py", line 239, in _repopulate_pool
w.start()
File "/usr/lib/python3.6/multiprocessing/process.py", line 105, in start
self._popen = self._Popen(self)
File "/usr/lib/python3.6/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/usr/lib/python3.6/multiprocessing/popen_spawn_posix.py", line 32, in __init__
super().__init__(process_obj)
File "/usr/lib/python3.6/multiprocessing/popen_fork.py", line 19, in __init__
self._launch(process_obj)
File "/usr/lib/python3.6/multiprocessing/popen_spawn_posix.py", line 42, in _launch
prep_data = spawn.get_preparation_data(process_obj._name)
File "/usr/lib/python3.6/multiprocessing/spawn.py", line 143, in get_preparation_data
_check_not_importing_main()
File "/usr/lib/python3.6/multiprocessing/spawn.py", line 136, in _check_not_importing_main
is not going to be frozen to produce an executable.''')
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
Please suggest what I am missing and how to do it
I have a TornadoAPI for keras model, is it possible to check concurrency using python multiprocessing module, I tried with below code, but it throws error
from multiprocessing import Pool
import requests, json
url = 'http://localhost:8888/index/predict'
payload = { "colA":"some1", "colB":"some2",...….)
pl = json.dumps(payload)
def callAPI(x):
session = requests.Session()
r = session.post(url, json=json.loads(pl))
response = r.json()
return response
if __name__ == '__main__':
Pool(processes=15).map(callAPI, range(5))
Error
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\HZ\Anaconda3\lib\multiprocessing\pool.py", line 121, in worker
result = (True, func(*args, **kwds))
File "C:\Users\HZ\Anaconda3\lib\multiprocessing\pool.py", line 44, in mapstar
return list(map(*args))
File "C:\Models\CallThreadsCheck.py", line 40, in callAPI
response = r.json()
File "C:\Users\HZ\Anaconda3\lib\site-packages\requests\models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "C:\Users\HZ\Anaconda3\lib\site-packages\simplejson\__init__.py", line 525, in loads
return _default_decoder.decode(s)
File "C:\Users\HZ\Anaconda3\lib\site-packages\simplejson\decoder.py", line 370, in decode
obj, end = self.raw_decode(s)
File "C:\Users\HZ\Anaconda3\lib\site-packages\simplejson\decoder.py", line 400, in raw_decode
return self.scan_once(s, idx=_w(s, idx).end())
simplejson.errors.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "CallThreadsCheck.py", line 49, in <module>
p.map(callAPI, range(calls))
File "C:\Users\HZ\Anaconda3\lib\multiprocessing\pool.py", line 268, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Users\HZ\Anaconda3\lib\multiprocessing\pool.py", line 657, in get
raise self._value
simplejson.errors.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
May I know what's this error about pls.
I'm running through a list of locations and trying to find places along my route. This is my first attempt at threading, so any tips would be appreciated! When i run this it'll work fine for the first few iterations, but then i start getting a KeyError and the API response says route is not found (even though it should be). If I search along a shorter route, everything runs fine. When I extend the route past a couple of hours of drive time I start getting these errors. Is it possible that I'm overloading it or does my code look off?
import pandas as pd
from threading import Thread
import threading
import requests
start_input = input("start: ")
end_input = input("end: ")
out_way = input("out of the way: ")
out_way_secs = int(out_way) * 60
thread_local = threading.local()
def get_session():
if not getattr(thread_local, "session", None):
thread_local.session = requests.Session()
return thread_local.session
def get_routes(url, start, end, waypoint, idx):
session = get_session()
with session.get(url, params={'origins': f'{start}|{waypoint}', 'destinations': f'{start}|{end}',
'key': '# key'}) as response:
route = response.json()
if route['rows'][1]['elements'][0]['status'] != 'OK':
results[idx] = {'# info'}
else:
nonstop_route = route['rows'][0]['elements'][1]['duration']['value']
leg1 = route['rows'][1]['elements'][0]['duration']['value']
leg2 = route['rows'][1]['elements'][1]['duration']['value']
time_added = (leg1 + leg2) - nonstop_route
time_added_mins = str(datetime.timedelta(seconds=(leg1 + leg2) - nonstop_route))
more_time = time_added_mins.split(':')
added_time_str = str(f'{more_time[0]}:{more_time[1]}:{more_time[2]} away!')
if time_added < allowable_time:
results[idx] = {# info to return}
return results[idx]
if __name__ == "__main__":
start_time = time.time()
output_df = pd.DataFrame(columns=['Location', 'Added Time', 'Notes'])
threads = [None] * coords[0]
results = [None] * coords[0]
for i in range(len(threads)):
threads[i] = Thread(target=get_routes, args=('https://maps.googleapis.com/maps/api/distancematrix/json',
start_input, end_input, stops[i], i))
threads[i].start()
for i in range(len(threads)):
threads[i].join()
for x in range(len(results)):
output_df = output_df.append(results[x], ignore_index=True)
output_df = output_df.sort_values(['Added Time'], ascending=True)
output_df.to_csv('output.csv', index=False)
there are 3 errors that it will get, this first one pops up by itself and the last 2 will come together. The code is the same when I run it, so not sure why i'm getting different errors.
This is the most common error that comes by itself (the routing duration works fine when run individually):
Exception in thread Thread-171:
Traceback (most recent call last):
File "C:\Python37-32\lib\threading.py", line 917, in _bootstrap_inner
self.run()
File "C:\Python37-32\lib\threading.py", line 865, in run
self._target(*self._args, **self._kwargs)
File "C:program.py", line 46, in get_routes
nonstop_route = route['rows'][0]['elements'][1]['duration']['value']
KeyError: 'duration'
The two below I get together and are less common:
Exception in thread Thread-436:
Traceback (most recent call last):
File "C:\Python37-32\lib\threading.py", line 917, in _bootstrap_inner
self.run()
File "C:\Python37-32\lib\threading.py", line 865, in run
self._target(*self._args, **self._kwargs)
File "C:/program.py", line 40, in get_routes
route = response.json()
File "C:\requests\models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "C:\Python37-32\lib\json\__init__.py", line 348, in loads
return _default_decoder.decode(s)
File "C:\Python37-32\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python37-32\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
second error:
Exception in thread Thread-196:
Traceback (most recent call last):
File "C:\site-packages\urllib3\response.py", line 360, in _error_catcher
yield
File "C:\urllib3\response.py", line 442, in read
data = self._fp.read(amt)
File "C:\Python37-32\lib\http\client.py", line 447, in read
n = self.readinto(b)
File "C:\Python37-32\lib\http\client.py", line 491, in readinto
n = self.fp.readinto(b)
File "C:\Python37-32\lib\socket.py", line 589, in readinto
return self._sock.recv_into(b)
File "C:\Python37-32\lib\ssl.py", line 1052, in recv_into
return self.read(nbytes, buffer)
File "C:\Python37-32\lib\ssl.py", line 911, in read
return self._sslobj.read(len, buffer)
ConnectionAbortedError: [WinError 10053] An established connection was aborted by the software in your host machine
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\site-packages\requests\models.py", line 750, in generate
for chunk in self.raw.stream(chunk_size, decode_content=True):
File "C:\site-packages\urllib3\response.py", line 494, in stream
data = self.read(amt=amt, decode_content=decode_content)
File "C:\site-packages\urllib3\response.py", line 459, in read
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
File "C:\Python37-32\lib\contextlib.py", line 130, in __exit__
self.gen.throw(type, value, traceback)
File "C:\site-packages\urllib3\response.py", line 378, in _error_catcher
raise ProtocolError('Connection broken: %r' % e, e)
urllib3.exceptions.ProtocolError: ("Connection broken: ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None)", ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None))
I would like to print out tweets which have #Berlin hashtag in it. How can I rewrite the code?I cant find sample codes in python3 for this action.
I have the following problem:
from tweepy.streaming import StreamListener
import tweepy
from tweepy import Stream
from tweepy import OAuthHandler
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def on_data(self, data):
print (data)
return (True)
def on_error(self, status):
print (status)
if __name__ == '__main__':
#This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
#This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
stream.filter(track=['Berlin'])
And then I got this error at the end:
Traceback (most recent call last):
File "test.py", line 31, in <module>
stream.filter(track=['Berlin'])
File "/home/ubuntu/tweepy/tweepy/streaming.py", line 430, in filter
self._start(async)
File "/home/ubuntu/tweepy/tweepy/streaming.py", line 346, in _start
self._run()
File "/home/ubuntu/tweepy/tweepy/streaming.py", line 286, in _run
raise exception
File "/home/ubuntu/tweepy/tweepy/streaming.py", line 255, in _run
self._read_loop(resp)
File "/home/ubuntu/tweepy/tweepy/streaming.py", line 298, in _read_loop
line = buf.read_line().strip()
File "/home/ubuntu/tweepy/tweepy/streaming.py", line 171, in read_line
self._buffer += self._stream.read(self._chunk_size)
TypeError: Can't convert 'bytes' object to str implicitly
This is related to a known bug in tweepy #615. Taken from a post in there.
In streaming.py:
I changed line 161 to
self._buffer += self._stream.read(read_len).decode('UTF-8', 'ignore')
and line 171 to
self._buffer += self._stream.read(self._chunk_size).decode('UTF-8', 'ignore')
They file you need to change on windows is located under \Python 3.5\Lib\site-packages\tweepy.
For Ubuntu you need: '/usr/lib/python3.5/dist-packages/tweepy'