How to download bulk training phrases from DialogFlow into spreadsheet - dialogflow-es

I am a beginner to Dialog flow .I want to know if there is a way to download bulk training phrases from DialogFlow into spreadsheet

Dialogflow does not have a feature that could bulk download the training phrases only. You can submit a feature request for this since it seems that it will be really useful.
You can only get the training phrases if you parse get_intent(). Using the code below (Python) I generated a CSV file that has the training phrases per intent name you have defined in the code.
def list_training_phrases(project_id, intent_arr):
"""Returns all training phrases for a specified intent."""
from google.cloud import dialogflow
from google.protobuf.json_format import MessageToDict
import csv
# Create the intents client
intent_client = dialogflow.IntentsClient()
# The options for views of an intent
intent_view = dialogflow.IntentView.INTENT_VIEW_FULL
# Compose the get-intent request
csv_arr = []
for intent_name in intent_arr:
get_intent_request = dialogflow.GetIntentRequest(
name=intent_name, intent_view=intent_view
)
intent = intent_client.get_intent(get_intent_request)
intent_dict = MessageToDict(intent._pb) # convert response to dictionary
phrases_arr = []
fixed_arr = []
# Iterate through the training phrases.
for phrases in intent_dict["trainingPhrases"]:
phrases_arr.append(phrases["parts"])
for entry in phrases_arr:
new_dict = merge_list_of_dictionaries(entry)
fixed_arr.append( ''.join(new_dict["text"]))
print(fixed_arr)
csv_arr.append(fixed_arr)
with open('./train_phrases.csv', 'w', encoding='UTF8') as f:
writer = csv.writer(f)
for row in csv_arr:
writer.writerow(row)
# [END dialogflow_list_training_phrases]
# https://stackoverflow.com/questions/45649141/combine-values-of-same-keys-in-a-list-of-dicts
def merge_list_of_dictionaries(dict_list):
new_dict = {}
for d in dict_list:
for d_key in d:
if d_key not in new_dict:
new_dict[d_key] = []
new_dict[d_key].append(d[d_key])
return new_dict
def list_intents(project_id):
from google.cloud import dialogflow
intents_client = dialogflow.IntentsClient()
parent = dialogflow.AgentsClient.agent_path(project_id)
intents = intents_client.list_intents(request={"parent": parent})
intent_arr = []
intent_to_check = ["Make Appointment", "Hours"] # define your intents here
for intent in intents:
if intent.display_name in intent_to_check:
intent_arr.append(intent.name)
list_training_phrases(project_id=project_id, intent_arr=intent_arr)
project_id = "your-project-id"
list_intents(project_id = project_id)
Per line of the output file represents the training phrases per intent. Line 1, represents "Hours" intent and Line 2 as "Make Appointment" intent.
See output below (train_phrases.csv):

Related

Python OneLogin: How to get all events using get_events?

I'm using Python 3 to get information about past events through the OneLogin API. I use the onelogin-python-sdk, which I got from GitHub.
I can get events with get_events. However, only the number of data items specified in max_results can be retrieved. What should I do to get data that can reach tens of thousands?
Should I use another API?
import json
import csv
from optparse import OptionParser
from onelogin.api.client import OneLoginClient
query_parameters = {}
def get_options():
# Analyze Options
return options
def format_eventdata(event_param):
#Formatting Dadta
return(event_data)
def main():
options = get_options()
client = OneLoginClient(options.client_id, options.client_secret, 'US')
events = client.get_events(query_parameters)
with open(options.file, 'w', newline='') as f:
writer = csv.writer(f)
idx = 0
if events:
for data in events:
eventdata=events[idx]
csv_data = format_eventdata(eventdata)
writer.writerow(csv_data)
idx += 1
else:
print("end")
if __name__ == '__main__':
main()
I found a solution by myself.
Adjusted the query parameters to reduce the amount of data retrieved at one time.
Thank you.

How to get maxresults for search from Youtube Data API v3?

I want to get as many results as possible for a particular Youtube search query. However, the maximum no. of results that can be retrieved is 50. I know that nextPageToken can be used to retrieve results of next page. How do modify the python code to achieve the same?
#!/usr/bin/python
# original source example: https://developers.google.com/youtube/v3/docs/search/list
# assumes use of Python 3
# This sample executes a search request for the specified search term.
# Sample usage:
# python search.py --q=surfing --max-results=10
# NOTE: To use the sample, you must provide a developer key obtained
# in the Google APIs Console. Search for "REPLACE_ME" in this code
# to find the correct place to provide that key..
import argparse
# library googleapiclient installed with: pip install --upgrade google-api-python-client
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# Set DEVELOPER_KEY to the API key value from the APIs & auth > Registered apps
# tab of
# https://cloud.google.com/console
# Please ensure that you have enabled the YouTube Data API for your project.
DEVELOPER_KEY = 'KEY'
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'
def youtube_search(query_term, max_results):
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
developerKey=DEVELOPER_KEY)
# Call the search.list method to retrieve results matching the specified
# query term.
search_response = youtube.search().list(
q=query_term,
part='id,snippet',
type='video',
relevanceLanguage='en',
maxResults=max_results
).execute()
video_ids = []
# Add each result to the appropriate list, and then display the lists of
# matching videos, channels, and playlists.
for search_result in search_response.get('items', []):
video_ids.append(search_result['id']['videoId'])
return video_ids
if __name__ == '__main__':
url_prefix = 'https://www.youtube.com/watch?v='
query_terms = '"my_query"'
max_results = 50
try:
ids = youtube_search(query_terms, max_results)
except HttpError as e:
print('An HTTP error %d occurred:\n%s' % (e.resp.status, e.content))
else:
with open('output.txt', 'w') as f:
for i in ids:
f.write(url_prefix+i+"\n")
Here's what that needs to be added to keep fetching results until no nextPageToken is found.
nextPageToken = search_response.get('nextPageToken')
while ('nextPageToken' in search_response):
nextPage = youtube.search().list(
q=query_term,
part='id,snippet',
type='video',
relevanceLanguage='en',
maxResults=max_results,
pageToken=nextPageToken
).execute()
search_response['items'] = search_response['items'] + nextPage['items']
if 'nextPageToken' not in nextPage:
search_response.pop('nextPageToken', None)
else:
nextPageToken = nextPage['nextPageToken']

AttributeError: 'WebElement' object has no attribute 'extract_first'

I am trying to run the script below to extract the tags from a webpage and save them into a csv file.
In details, I want to extract the tags associated to a class name.
However, I come across this error: AttributeError:
'WebElement' object has no attribute 'extract_first'.
The script is the following:
import csv
from selenium import webdriver
from time import sleep
from parsel import Selector
from selenium.webdriver.common.keys import Keys
from collections import defaultdict
from selenium.webdriver.support.select import Select
####### reading from the input file ##########
columns = defaultdict(list) # each value in each column is appended to a list
# get the list of keywords from the csv file
with open('query.csv', 'r') as csvfile:
reader = csv.DictReader(csvfile) # read rows into a dictionary format
for row in reader: # read a row as {column1: value1, column2: value2,...}
for (k, v) in row.items(): # go over each column name and value
columns[k].append(v) # append the value into the appropriate list
# the list containing all of the keywords
search_query_list = columns['Keyword']
########## start scraping ###############
rb_results = []
# create a driver and let it open google chrome
driver = webdriver.Chrome("chromedriver")
# get website
driver.get('https://www.redbubble.com/')
sleep(0.5)
for i in range(len(search_query_list)):
next_query = search_query_list[i]
# get RB website
driver.get('https://www.redbubble.com/')
# get the search by its id
search_bar = driver.find_element_by_name("query")
sleep(0.5)
# enter the query to the search bar
search_bar.send_keys(next_query)
# press enter
search_bar.send_keys(Keys.RETURN)
sleep(1)
# from parsel's selector get the page source
sel1 = Selector(text=driver.page_source)
sleep(0.5)
# get first shirt //
continue_link = driver.find_element_by_class_name('shared-components-ShopSearchSkeleton-ShopSearchSkeleton__composedComponentWrapper--1s_CI').click()
sleep(1)
sel2 = Selector(text=driver.page_source)
sleep(0.5)
################## get TAGS ###############
# Check tags for all products
try:
# get the tags for the search query
tags_rb = driver.find_element_by_class_name("shared-components-Tags-Tags__listContent--oLdDf").extract_first()
tags_rb = str(tags_rb)
# if number of products is found print it and search for the prime
# print the number of products found
if tags_rb == None:
rb_results.append("0")
else:
#rb_results = str(tags_rb)
rb_results.append(tags_rb)
except ValueError:
pass
#except:
#rb_results.append("errore")
###### writing part ########
with open ("rb_results.csv","w", newline='') as resultFile:
writer = csv.DictWriter(resultFile, fieldnames=["Rb Results"],delimiter='\t')
writer.writeheader()
writer.writerows({'Rb Results': item} for item in rb_results)
resultFile.close()
Any ideas about how to fix it and extract the text of shared-components-Tags-Tags__listContent--oLdDf ? Many thanks!!!
If I right understand, you want an element text. So you can do it like this:
replace:
tags_rb = driver.find_element_by_class_name("shared-components-Tags-Tags__listContent--oLdDf").extract_first()
with:
tags_rb = driver.find_element_by_class_name("shared-components-Tags-Tags__listContent--oLdDf").text
You are getting this error:
'WebElement' object has no attribute 'extract_first'.
because WebElement does not have method .extract_first().
PS: you don't need this:
tags_rb = str(tags_rb)
The code block to replace is:
# Check tags for all products
try:
# get the tags for the search query
tags_rb = driver.find_element_by_class_name("shared-components-Tags-Tags__listContent--oLdDf").text # get text
# tags_rb = str(tags_rb) no need in this line
# if number of products is found print it and search for the prime
# print the number of products found
if tags_rb == None:
rb_results.append("0")
else:
#rb_results = str(tags_rb)
rb_results.append(tags_rb)
except ValueError:
pass

Python: Facebook Graph API - batch request

I want to make a batch request getting campaigns for a specific ad account. I created a simple code based on this issue
but I've used some global arrays and I don't know if time.sleep(2) is necessary for this code. My code is as below:
from facebookads import FacebookAdsApi
from facebookads.api import FacebookRequest
import pandas as pd
import time
batch_body_responses = []
list_of_artists = [1]
def success_callback(response):
try:
pair = [response.json()['data']]
next = [response.json()['paging']['next']]
batch_body_responses.append(pair)
batch_body_responses.append(next)
except IndexError:
pass
except UnicodeEncodeError:
pass
def error_callback(response):
pass
def generate_batches(iterable, batch_size_limit):
# This function can be found in examples/batch_utils.py
batch = []
for item in iterable:
if len(batch) == batch_size_limit:
yield batch
batch = []
batch.append(item)
if len(batch):
yield batch
def get_id_list(art_search_list):
batches = []
your_app_id = '756885'
your_app_secret = '123456789'
your_access_token = 'EAA.....'
api = FacebookAdsApi.init(your_app_id, your_app_secret, your_access_token)
batch_limit = 25
for batch in generate_batches(art_search_list, batch_limit):
next_batch = api.new_batch()
for artt in batch:
requestss = [FacebookRequest(node_id='act_1234/campaigns',method="GET",endpoint="?fields=id,name")]
for req in requestss:
next_batch.add_request(req, success_callback, error_callback)
batches.append(next_batch)
for batch_request in batches:
batch_request.execute()
time.sleep(2)
print(batch_body_responses)
return batch_body_responses
df = pd.DataFrame(get_id_list(list_of_artists))
How can this code optimized by not using global arrays and how to execute without sleep statement and why it is needed sleep?

Unicode characters to Turkish characters

(Edit: my original question is posted here, but the issue has been resolved and the code below is correct). I am looking for advice on how to convert Unicode characters to Turkish characters. The following code (posted online) scrapes tweets for an individual user and outputs a csv file, but the Turkish characters come out as in Unicode characters, i.e. \xc4. I am using Python 3 on a mac.
import sys
default_encoding = 'utf-8'
if sys.getdefaultencoding() != default_encoding:
reload(sys)
sys.setdefaultencoding(default_encoding)
import tweepy #https://github.com/tweepy/tweepy
import csv
import string
import print
#Twitter API credentials
consumer_key = ""
consumer_secret = ""
access_key = ""
access_secret = ""
def get_all_tweets(screen_name):
#Twitter only allows access to a users most recent 3240 tweets with this method
#authorize twitter, initialize tweepy
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
#initialize a list to hold all the tweepy Tweets
alltweets = []
#make initial request for most recent tweets (200 is the maximum allowed count)
new_tweets = api.user_timeline(screen_name = screen_name,count=200)
#save most recent tweets
alltweets.extend(new_tweets)
#save the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
#keep grabbing tweets until there are no tweets left to grab
while len(new_tweets) > 0:
#print "getting tweets before %s" % (oldest)
#all subsiquent requests use the max_id param to prevent duplicates
new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest)
#save most recent tweets
alltweets.extend(new_tweets)
#update the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
transform the tweepy tweets into a 2D array that will populate the csv
outtweets = [[tweet.id_str, tweet.created_at, tweet.text)] for tweet in alltweets]
write the csv
with open('%s_tweets.csv', 'w', newline='', encoding='utf-8-sig') as f:
writer = csv.writer(f)
writer.writerow(["id","created_at","text"])
writer.writerows(outtweets)
pass
if __name__ == '__main__':
pass in the username of the account you want to download
get_all_tweets("")
The csv module docs recommend you specify the encoding when you open the file. (and also that you use newline='' so the CSV module can do its own handling for newlines). Don't encode Unicode strings when writing rows.
import csv
with open('test.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['id','created_at','text'])
writer.writerows([[123, 456, 'Äβç']])

Resources