I am running a code that gets tweets from the Twitter API and saving them to a txt file. the code is as follows:
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import json
import sentmod as s
#consumer key, consumer secret, access token, access secret.
ckey= "xxxxxxxxxxxxxxxxxxx"
csecret="xxxxxxxxxxxxxxxxxxx"
atoken="xxxxxxxxxxxxxxxxxxx"
asecret="xxxxxxxxxxxxxxxxxxx"
class listener(StreamListener):
def on_data(self, data):
all_data = json.loads(data)
tweet = all_data["text"]
sentiment_value, confidence = s.sentiment(tweet)
tweet.encode('ascii', 'ignore')
tweets= open("tweets.txt","a",encoding="utf-8")
tweets.write(tweet)
tweets.write('\n\n\n')
tweets.close()
print(tweet, sentiment_value, confidence)
if confidence*100 >= 60:
output = open("twitter-out.txt","a")
output.write(sentiment_value)
output.write('\n\n\n')
output.close()
return True
def on_error(self, status):
print(status)
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
twitterStream.filter(track=["Car"],languages=['en']) #locations=[]
When I add the tweets to a text file the tweets sometimes get repeated tweets, how can I fix that?
Related
I am trying to use Tweepy and streaming to track Tweets in real time. I am using the following which works fine:
import tweepy
import configparser
import sys
#read configs
config = configparser.ConfigParser()
config.read('config.ini')
api_key = config['twitter']['api_key']
api_key_secret = config['twitter']['api_key_secret']
access_token = config['twitter']['access_token']
access_token_secret = config['twitter']['access_token_secret']
class StreamCollector(tweepy.Stream):
def on_status(self, status):
if not hasattr(status, 'retweeted_status') and status.in_reply_to_screen_name == None and status.i\
s_quote_status == False:
if status.author.followers_count > 100000:
print('Twitter Handle: #'+status.author.screen_name)
print('Followers:',status.author.followers_count)
print('Tweet:',status.text)
print('\n')
#print(status.user.screen_name.encode('UTF-8'))
stream = StreamCollector(api_key,api_key_secret,access_token, access_token_secret)
stream.filter(track=["table"])
However, I want to produce the untruncated Tweet. I tried substituting status.text for status.full_text but I got the error:
AttributeError: 'Status' object has no attribute 'full_text'
My version of Tweepy is 4.5.0 and Python is 3.9.9.
The tweepy.API has a compatibility mode and extended mode. The extended mode should allow you to get the full text of the Tweet.
ref: Extended Tweets
Here is the code with the extended mode call.
import sys
import tweepy
import configparser
#read configs
config = configparser.ConfigParser()
config.read('config.ini')
api_key = config['twitter']['api_key']
api_key_secret = config['twitter']['api_key_secret']
access_token = config['twitter']['access_token']
access_token_secret = config['twitter']['access_token_secret']
class StreamCollector(tweepy.Stream):
def on_status(self, status):
if not hasattr(status, 'retweeted_status'):
if status.in_reply_to_screen_name is None and status.is_quote_status is False:
if status.author.followers_count > 100000:
print(f'Twitter Handle: #{status.author.screen_name}')
print(f'Followers: {status.author.followers_count}')
if 'extended_tweet' in status._json:
full_text = status._json['extended_tweet']['full_text']
print(f'Tweet: {full_text}')
elif 'extended_tweet' not in status._json:
print(f'Tweet: {status.text}')
print('\n')
stream = StreamCollector(api_key,api_key_secret,access_token, access_token_secret)
stream.filter(track=["table"])
Streaming is covered in Tweepy's documentation on extended Tweets:
By default, the Status objects from streams may contain an extended_tweet attribute representing the equivalent field in the raw data/payload for the Tweet. This attribute/field will only exist for extended Tweets, containing a dictionary of sub-fields. The full_text sub-field/key of this dictionary will contain the full, untruncated text of the Tweet
I am trying take the list of videoids and extract the comments from those ids in a list. I am having trouble figuring out a way to loop through all of the videoids (I have been able to get one video, but it stops there)
Currently, I get an error saying that it can't find the videoId (it is not being passed)
import os
import googleapiclient.discovery
import sys
import csv
import re
import json
import pandas as pd
from datetime import datetime
from datetime import timedelta
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = "CENSORED"
youtube = googleapiclient.discovery.build(
api_service_name, api_version, developerKey = DEVELOPER_KEY)
#Calculating timestamp for 2 weeks before this script is run
now = datetime.now()
weeks_calculated = now + timedelta(weeks=-2)
#Passing parameters to API to retrieve video ids for the past 2 weeks
def get_id(youtube,channelId):
response = youtube.search().list(
part="snippet",
type='video',
channelId=channelId,
publishedAfter='{}'.format(weeks_calculated.isoformat("T") + "Z")
#ublishedAfter="2021-08-10T00:00:00Z"
#rder="time",
#pageToken=pageToken
#maxResults=maxResults
).execute()
return response
#Passing parameters to API to retrieve comments by video id
def get_comments(youtube,videoId):
response = youtube.commentThreads().list(
part="snippet,replies",
videoId=videoId,
order="time",
#pageToken=pageToken,
textFormat="plainText",
moderationStatus="published",
#maxResults=maxResults
).execute()
return response
comment_text = []
video_id = []
def get_channel_data():
channels = [{"id":"UCQlVOYJyQp64rA12ac0mv6g"}]
for channel in channels:
video_data = get_id(youtube,channel['id'])
for i in video_data['items']:
videoData = i['id']['videoId']
video_id.append(videoData)
return video_id
def get_comment_data():
videoId = get_channel_data()
videoId = videoId
response = youtube.commentThreads().list(
part="snippet,replies",
videoId=videoId,
order="time",
textFormat="plainText",
moderationStatus="published"
#maxResults=maxResults
).execute()
while response:
for videoId[0] in response:
for item in response['items']:
original = item['snippet']['topLevelComment']['snippet']['textOriginal']
comment_text.append(original)
return comment_text
get_comment_data()
I am using python and Tweepy in a script that loops and prints the tweet id, the time it was created and the full text. The problem am having is that it keeps on printing the same id and full text if there is no new tweet, is it possible to have an if statement or a listener that will only print the id, time created and full text only once and wait until the user creates a new tweet.
Below is my code:
import tweepy
import tkinter
import time
CONSUMER_KEY= 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
CONSUMER_SECRET= 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
ACCESS_TOKEN= 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
ACCESS_TOKEN_SECRET= 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
# set parser=tweepy.parsers.JSONParser() if you want a nice printed json response.
user = api.me()
while True:
tweets = api.user_timeline(screen_name = "elonmusk",
count=1,
include_rts = False,
tweet_mode = 'extended'
)
for detailsInTweet in tweets:
print("ID: {}".format(detailsInTweet.id))
# Tweet creation time
print(detailsInTweet.created_at, "\n")
# Full tweet text
print(detailsInTweet.full_text, "\n")
print("\n")
time.sleep(5)
I learnt that Twitter has stopped providing JSON for deleted tweets.I am trying to get past this limitation by using a polling method to see if tweet is deleted.
But my code still fails. I would appreciate it if you can help me figure out what I am missing.
import sys
import json
import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener
import datetime
import time
from polling import TimeoutException, poll
# Go to http://apps.twitter.com and create an app.
# The consumer key and secret will be generated for you after
consumer_key = 'xx'
consumer_secret = 'xx'
access_token = 'xx'
access_token_secret = 'xx'
# Set up the authorisation to use the Twitter API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
# Handle the output generated from the stream
class listener(StreamListener):
tweetcount = 0
def on_data(self, data):
# Convert the message to JSON
json_data = json.loads(data)
# text_file = open(json_data['id_str'] + ".json", "w")
# text_file.write(data)
# text_file.close()
if 'id_str' not in json_data:
# If this isn't a status, do nothing.
print("no ID")
else:
#print("Twitter Id ",json_data['id_str'])
#print("User Id ",json_data['user']['id_str'])
if json_data['user']['id_str'] == '51241574': #Associated Press
tweetcount = json_data['user']['statuses_count']
tweet = api.get_status(json_data['id'])
print("Tweet Count ",tweetcount)
print("Account Name ", json_data['user']['name'])
print(tweet.text)
else:
pass
# if 'delete' in json_data:
# print ("DELETED!")
# if json_data['delete']['status']['user_id'] == '51241574':
# deleted_tweet_id =json_data['delete']['status']['id']
# tweetcount -= 1
# print("New Count is ",tweetcount)
# print(deleted_tweet_id)
# deleted_tweet =api.get_status(deleted_tweet_id)
# print(deleted_tweet.text)
#
# else:
# pass
return True
def on_error(self, status):
print("Error status is ",status)
# Start consuming from the stream. This will get all the Tweets & Deletions from the users the user is following.
twitterStream = Stream(auth, listener())
twitterStream.filter(follow=['51241574'], async=True)
# polling method to check if tweet is deleted
try:
user = api.get_user('AP')
poll(lambda: user.statuses_count >= listener.tweetcount > 0, timeout=30, step=1)
print("Tweet was deleted,New Tweet count is ", user.statuses_count)
except Exception as ex:
template = "An exception of type {0} occurred. Arguments:\n{1!r}"
message = template.format(type(ex).__name__, ex.args)
print (message)
When a listener event is fired, the application shows the value in tweet count variable and checks it against the value retrieved from querying the api.
I am trying to run the Twitter Streaming API to collect only tweets from a certain country.
Trying to write long/lat coordinates as a filter gives the syntax error:
positional argument follows keyword argument
I'm willing to use the geotag to filter by country or city, but have no idea what needs to be written into the code to do that.
A solution to stream only tweets from an approximate location would be great.
# Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
# Variables that contain the user credentials to access Twitter API
access_token = "put your keys here"
access_token_secret = "puts your keys here"
consumer_key = "puts your keys here"
consumer_secret = "puts your keys here"
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def on_data(self, data):
print(data)
return True
def on_error(self, status):
print(status)
if __name__ == '__main__':
#This handles Twitter authentification and the connection to Twitter
Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
You need to supply the bounding box for your location in latitude and longitude. Something like this,
stream.filter(locations=[-74,40,-73,41])
Here is Twitter's documentation.