I'm making a bot that downloads posts from a subreddit I selected, but I only want to download photos. My code already makes sure that the post is not a text but I'm not able to check if the video or not. If the post is a video, the program should just skip it.
Here is my code so far:
from InstagramAPI import InstagramAPI
import praw
import requests
import urllib.request
import time
import keyboard
from PIL import Image
import math
#make a reddit acount and look up how to find this stuff. its called PRAW
reddit = praw.Reddit(client_id='***',
client_secret='***',
username='',
password='',
user_agent='chrome')
def DLimage(url, filePath, fileName):
fullPath = filePath + fileName + '.jpg'
urllib.request.urlretrieve(url, fullPath)
#folder path to store downloaded images
filePath = "/Users/***/AppBot/WTF/"
subreddit = reddit.subreddit('videos') #subreddit to take images from
waitTime = 2 #to prevent reddit badgateway error. DONt change
numRounds = 100 #how many posts
postFrequency = 600 # how often to post in seconds.
numPics = 100 #how many pics per post
for x in range(numRounds):
new_memes = subreddit.top('all') #.hot/.rising/.new reddit sorting algorithm
authors = []
photoAlbum = []
print("Round/post number:", x)
for subbmission in new_memes:
if subbmission.preview == True: #checking if post is only text.
#print("Post was text, skipping to next post.")
pass
else:
continue
url = subbmission.url
time.sleep(waitTime)
fileName = str(subbmission)
fullPath = filePath + fileName + '.jpg'
#print(fullPath)
time.sleep(waitTime)
#print(url)
try:
DLimage(url, filePath, fileName)
except:
print("scratch that, next post.")
continue
time.sleep(waitTime)
img = Image.open(fullPath)
width, height = img.size
#img = img.resize((1000, 1020), Image.NEAREST) #image resize. width/height
img = img.convert("RGB")
img.save(fullPath)
time.sleep(postFrequency)
Related
I'm a python newbie who has been assigned the task of downloading and storing locally at least 100 to 200 photographs preferably in .jpg format. The code was provided to me, but thus far I haven't been able to get it to work. The code goes to https://www.instagram.com/explore/tags/feetphotos to get the photographs.
I've created an account to access the images. The code creates a insta_foot.csv file which contains an index and a link to the photograph itself to be downloaded. The .csv file gets created sometimes with the indices and links sometimes without the same. The last part of the code downloads the photographs from each of the links into an images directory created locally by the script.
import time
import pandas as pd
import requests
import bs4 as bs
from selenium import webdriver
driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver")
url = 'https://www.instagram.com/explore/tags/foot/'
driver.get(url)
img_sizes = ['150w', '240w', '320w', '480w', '640w']
df = pd.DataFrame(columns = img_sizes)
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
el = driver.find_element_by_tag_name('body')
soup = bs.BeautifulSoup(el.get_attribute('innerHTML'), 'lxml')
for t in soup.findAll('img', {"class": "FFVAD"}):
a_series = pd.Series(['https://'+s.split(' ')[0] for s in
t['srcset'].split('https://')[1:]], index = df.columns)
df = df.append(a_series, ignore_index=True)
df.drop_duplicates(inplace = True)
print('last_height: ', last_height, ' links: ', len(df))
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3)
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
df.to_csv('insta_foot.csv')
size = '640w'
for i, row in df.iterrows():
link = row[size]
n = 'images/' + [e for e in link.split('/') if '.jpg' in e][0].split('.jpg')[0] +
'_' + size + '.jpg'
with open(n,"wb") as f:
f.write(requests.get(link).content)
print('index: ', i)
driver.close()
When using my code bellow (It turns YouTube videos into ASCII with audio) The latency between the audio and video grows bigger each frame (I have tried many different wait times) I was wondering if there is a way to change the code to make it so the wait key changes depending on how much latency there is. I have only been coding for 6 months so sorry if there is any bad code.
import pytube
import os
import cv2
import PIL.Image
import winsound
from moviepy.editor import *
from pydub import AudioSegment
import threading
import time
##################################################
# downloads the youtube video and lets you input the path for where it should be saved
url = input ("Enter the you youtube url: \n\n")
path = input ("Enter the path where you want the youtube video to be saved: \n\n")
try:
youtube = pytube.YouTube(url)
streams = youtube.streams.all()
video = youtube.streams.get_highest_resolution()
video.download(path)
print ("Done!")
except:
print ("\nYoutube video has coppy righted material so it can not be downloaded. Try again with a different video")
##################################################
#locates all the files with the file extension .mp4
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".mp4"):
file_name = (file)
file_path = (os.path.join(root,file))
print (file_name)
print (file_path)
##################################################
mp3_file = (path+"\\")
mp3_file = (mp3_file+"audio.mp3")
mp4_file = (path+"\\")
mp4_file = (mp4_file+file_name)
VideoClip = VideoFileClip(mp4_file)
audioclip = VideoClip.audio
audioclip.write_audiofile(mp3_file)
audioclip.close()
VideoClip.close()
sound = AudioSegment.from_mp3(mp3_file)
sound.export(path+"/audio.wav", format = "wav")
##################################################
def a():
# Ascii characters used to create the output
ASCII_CHARS = ["#", "#", "S", "%", "?", "*", "+", ";", ":", ",", "."]
def resized_gray_image(image ,new_width=80):
width,height = image.size
aspect_ratio = height/width
new_height = int(aspect_ratio * new_width)
resized_gray_image = image.resize((new_width,new_height)).convert('L')
return resized_gray_image
def pix2chars(image):
pixels = image.getdata()
characters = "".join([ASCII_CHARS[pixel//25] for pixel in pixels])
return characters
def generate_frame(image,new_width=80):
new_image_data = pix2chars(resized_gray_image(image))
total_pixels = len(new_image_data)
ascii_image = "\n".join([new_image_data[index:(index+new_width)] for index in range(0, total_pixels, new_width)])
sys.stdout.write(ascii_image)
os.system('cls' if os.name == 'nt' else 'clear')
cap = cv2.VideoCapture(mp4_file)
print (cap)
try:
while True:
ret,frame = cap.read()
cv2.imshow("frame",frame)
generate_frame(PIL.Image.fromarray(frame))
cv2.waitKey(1)
except:
threading.Thread(target=c).start()
##################################################
def b():
winsound.PlaySound(path+"/audio.wav",winsound.SND_FILENAME)
##################################################
def c ():
os.remove (mp3_file)
os.remove (mp4_file)
os.remove (path+"/audio.wav")
threading.Thread(target=a).start()
threading.Thread(target=b).start()
I am trying to download all the images from the website but been unable to do so. How I can download all the images from a specific section of a website and save it to my directory?
The below code exports all the image and saves the image link to a csv file, but I also want the image to save it in my directory also.
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup as soup
my_url = 'https://www.newegg.com/Video-Cards-Video-Devices/Category/ID-38?Tpk=graphics%20card'
req = Request(my_url, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
page_soup = soup(webpage, "html.parser")
filename = "abc.csv"
f = open(filename, "w")
headers = "imagelink\n"
f.write(headers)
snackcrisps = page_soup.findAll("div",{"class":"divCategories divShops-newegg"})
crispitem = snackcrisps[0]
img = crispitem.findAll("div",{"class":"product_image_div productSmall_image_div_lit"})
img1 = img[0]
for img1 in img:
img2 = img1.findAll('img')
imageLink = img2[0].get('src')
print("imageLink: " + imageLink)
f.write(imageLink + "\n")
f.close()
How can I save the images in my local directory? Help needed!!
Many Thanks
I used the response to this post to formulate my answer.
First you need to build the full URL for the image you want. This could be as simple as appending "https:" to the beginning of the image link, or not changing the value at all. You'll have to investigate (review this post) how to adjust the URLs you find based on whether or not they are relative or absolute.
You'll want to use the requests module to make the request for the image.
import requests
import shutil
for img1 in img:
img2 = img1.findAll('img')
imageLink = img2[0].get('src')
if not "https:" in imageLink:
imageLink = "https:" + imageLink
r = requests.get(imageLink, stream=True)
if r.response = 200:
with open("my_file.jpg", 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
i want to write a telegram bot that save photos .
this is my code , but its not working.
and i don't know what is my problem?
def image_handler(bot, update):
file = bot.getFile(update.message.photo.file_id)
print ("file_id: " + str(update.message.photo.file_id))
file.download('image.jpg')
updater.dispatcher.add_handler(MessageHandler(Filters.photo, image_handler))
updater.start_polling()
updater.idle()
pleas help me to solve my problem.
update.message.photo is an array of photos sizes (PhotoSize objects).
Use file = bot.getFile(update.message.photo[-1].file_id). This will get the image with biggest size available.
Here is my code
from telegram.ext import *
import telegram
def start_command(update, context):
name = update.message.chat.first_name
update.message.reply_text("Hello " + name)
update.message.reply_text("Please share your image")
def image_handler(update, context):
file = update.message.photo[0].file_id
obj = context.bot.get_file(file)
obj.download()
update.message.reply_text("Image received")
def main():
print("Started")
TOKEN = "your-token"
updater = Updater(TOKEN, use_context = True)
dp = updater.dispatcher
dp.add_handler(CommandHandler("start", start_command))
dp.add_handler(MessageHandler(Filters.photo, image_handler))
updater.start_polling()
updater.idle()
if __name__ == '__main__':
main()
Unlike the accepted answer suggests, you don't actually need the bot object to get the file:
file = update.message.photo[-1].get_file()
and then download the file:
path = file.download("output.jpg")
to use it for further processing or just have it on your device : )
Here is a vanilla python solution:
import requests
from PIL import Image
# for example, we get the last message
# update = requests.post(f'https://api.telegram.org/bot{TOKEN}/getUpdates').json()['result'][-1]
msg = update['message']
# check whether the message contains a photo
if msg.get('photo', None) == None:
return
# get the photo id with the biggest resolution
file_id = msg['photo'][-1]['file_id']
# get URL by id
file_path = requests.get(f'https://api.telegram.org/bot{TOKEN}/getFile?file_id={file_id}').json()['result']['file_path']
# open URL with Pillow
img = Image.open(requests.get(f'https://api.telegram.org/file/bot{TOKEN}/{file_path}', stream=True).raw)
# save on the disk if needed
img.save('photo.jpg')
#dp.message_handler(commands='start')
9 async def s_photo(message: types.Message):
10 """getting str type of json file"""
11photostr = await bot.get_user_profile_photos(message.from_user.id)
12 """parsing str to json"""
13 photojson = json.loads(photostr.as_json())
14 """giving file unic code to get_file method"""
15 photo = await bot.get_file(photojson['photo'][0][0]['file_id'])
16 """end downloading object with download method"""
17 downloadphoto = await photo.download('filename'+'.jpeg')
I cannot understand why this error happen.
First,I wrote
import urllib.request
from bs4 import BeautifulSoup
import time
import os
def download_image(url,name):
path = "./scrape_image/"
imagename = str(name) + ".jpg"
if not os.path.exists(path):
os.makedirs(path)
print(path)
urllib.request.urlretrieve(url,path+imagename)
url = "https://api.XXXkeyword=YYY&limit=1000"
response = urllib.request.urlopen(url)
rss = response.read().decode("utf-8")
soup = BeautifulSoup(rss, "xml")
name=0
for s in soup.find_all("photo"):
url = s.find_all("image_url")[0].string
name+=1
download_image(url, name)
by running this code,I can get 1 image from the API.But originally the right code can get 1000 images from the API.I fixed indents in the first code, so my code is like
import urllib.request
from bs4 import BeautifulSoup
import time
import os
def download_image(url,name):
path = "./image/"
imagename = str(name) + ".jpg"
if not os.path.exists(path):
os.makedirs(path)
print(path)
urllib.request.urlretrieve(url, path+imagename)
time.sleep(1)
url = "https://api.XXXkeyword=YYY&limit=1000"
response = urllib.request.urlopen(url)
rss = response.read().decode("utf-8")
soup = BeautifulSoup(rss, "xml")
name = 0
for s in soup.find_all("photo"):
url = s.find_all("image_url")[0].string
name+=1
download_image(url,name)
At last,I can get 1000 images from the API. But I cannot understand why I can do so by fixing the indent.Please give me some explanations.
Because in the first example you're only getting the image if your condition passes:
if not os.path.exists(path):
And that condition will only pass once because you immediately create the path:
os.makedirs(path)
For every other iteration of the loop, the condition is false. So the code within the conditional block doesn't execute.
Basically, an if block only executes if the condition is true. When you move your code out of the if block, it always executes regardless of the condition.