Running MPI python script in MPI azure ml pipeline - python-3.x

I'm trying to run distributed python job through azure ML pipelines using MPIStep pipeline class, by referring to the below example link - https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.ipynb
I tried implemented the same but even I change the node count parameter in MpiStep class, while running the script the it shows size (i.e comm.Get_size()) as 1 always. Can you please help me in what I'm missing here. Is there any specific setup required on the cluster?
Code snippets:
Pipeline code snippet:
model_dir = model_ds.path('./'+saved_model_blob+'/',data_reference_name='saved_model_path').as_mount()
label_dir = model_ds.path('./'+model_label_blob+'/',data_reference_name='model_label_blob').as_mount()
input_images = result_ds.path('./'+score_blob_name+'/',data_reference_name='Input_images').as_mount()
output_container = 'abc'
inti_container = 'xyz'
distributed_batch_score_step = MpiStep(
name="batch_scoring",
source_directory=SCRIPT_FOLDER,
script_name="batch_scoring_script_mpi.py",
arguments=["--dataset_path", input_images,
"--model_name", model_dir,
"--label_dir", label_dir,
"--intermediate_data_container", inti_container,
"--output_container", output_container],
compute_target=gpu_cluster,
inputs=[input_images, model_dir,label_dir],
pip_packages=["tensorflow","tensorflow-gpu==1.13.1","pillow","azure-keyvault","azure-storage-blob"],
conda_packages=["mesa-libgl-cos6-x86_64","mpi4py==3.0.2","opencv=3.4.2","scikit-learn=0.21.2"],
use_gpu=True,
allow_reuse = False,
node_count = nodecount_param,
process_count_per_node = 1
)
Python Script code snippet:
def run(input_dataset,comm):
rank = comm.Get_rank()
size = comm.Get_size()
print("Rank:" , rank)
print("Size:", size) # shows always 1, even the input node count is >1
print(MPI.Get_processor_name())
file_names = get_file_names(args.dataset_path)
sorted(file_names)
partition_size = len(file_names) // size
print("partition_size-->",partition_size)
partitioned_filenames = file_names[rank * partition_size: (rank + 1) * partition_size]
print("RANK {} - is processing {} images out of the total {}".format(rank, len(partitioned_filenames),
len(file_names)))
# call to Function 01
# call to Function 02
img_names = score_df['image_name'].unique()
output_batch = pd.DataFrame()
for i in img_names:
# call to Function 3
output_batch = output_batch.append(pp_output, ignore_index=True)
output_paths_list = comm.gather(output_batch, root=0)
print("RANK {} - number of pre-aggregated output files {}".format(rank, len(output_batch)))
print("saved in", currentDT + '\\' + 'data.csv')
if rank == 0:
print("RANK {} - number of aggregated output files {}".format(rank, len(output_paths_list)))
print("RANK {} - end".format(rank))
if __name__ == "__main__":
with tf.device('/GPU:0'):
init()
comm = MPI.COMM_WORLD
run(args.dataset_path,comm)

Got to know the issue is due to package version, earlier it is installed via conda with conda_packages=["mpi4py==3.0.2"], it worked after changing the install through pip - pip_packages=["mpi4py"]

Related

IndexError:: array index out of range

python3
def __init__(self):
super().__init__('object_tracking')
# Declare ROS parameters
self.declare_parameters(namespace='',
parameters=[('qos_length',0),
('topic.untracked_obj',''),
('topic.rgb_image',''),
('topic.tracked_obj',''),
('obj_class.id',[]),
('obj_class.name',[]),
('display',True),
('frame_id.tracked_obj','')])
self.nodeParams()
qos_length = self.get_parameter('qos_length').get_parameter_value().integer_value
qos_profile = QoSProfile(depth=qos_length,
history=QoSHistoryPolicy.KEEP_LAST,
reliability=QoSReliabilityPolicy.RELIABLE)
# Load cv_bridge
self.bridge = CvBridge()
# Create instance of SORT
self.mot_tracker = Sort()
# Create Subscribers
obj_topic = self.get_parameter('topic.untracked_obj').get_parameter_value().string_value
self.obj_sub = mf.Subscriber(self,ObjectArray,obj_topic,qos_profile=qos_profile)
rgb_topic = self.get_parameter('topic.rgb_image').get_parameter_value().string_value
self.rgb_sub = mf.Subscriber(self,Image,rgb_topic,qos_profile=qos_profile)
# Apply message filter
self.timestamp_sync = mf.TimeSynchronizer([self.obj_sub,self.rgb_sub],queue_size=qos_length)
self.timestamp_sync.registerCallback(self.objCallback)
# Create Publishers
obj_topic = self.get_parameter('topic.tracked_obj').get_parameter_value().string_value
self.obj_pub = self.create_publisher(ObjectArray,obj_topic,qos_profile)
def nodeParams(self):
#print('1')
self.display = self.get_parameter('display').get_parameter_value().bool_value
class_id = self.get_parameter('obj_class.id').get_parameter_value().integer_array_value
#print(class_id)
class_name = self.get_parameter('obj_class.name').get_parameter_value().integer_array_value
#print(class_name)
self.class_dict = {}
#for name in class_name:
'''#for i,id_ in enumerate(class_id):
#print('2')
#self.class_dict = class_name [name]
#print('3')'''
for i,id_ in enumerate(class_id):
self.class_dict[int(id_)] = class_name[i]
I'm not sure what's going on...I'd like to try object tracking in Carla 0.9.13 with ros2 foxy in Python 3.8. Could you please help me?
[object_tracking.py-3] self.nodeParams()
[object_tracking.py-3] File "/home/smit/ros2_ws/install/carla_simulation/lib/carla_simulation/object_tracking.py", line 64, in nodeParams
[object_tracking.py-3] self.class_dict[int(id_)] = class_name[i]
[object_tracking.py-3] IndexError: array index out of range
[ERROR] [object_tracking.py-3]: process has died [pid 623526, exit code 1, cmd '/home/smit/ros2_ws/install/carla_simulation/lib/carla_simulation/object_tracking.py --ros-args --params-file /home/smit/ros2_ws/install/carla_simulation/share/carla_simulation/config/params.yaml'].
You are pobably using the returned hierarchy variable wrong.
According to the specification:
In Python, hierarchy is nested inside a top level array. Use hierarchy[0][i] to access hierarchical elements of i-th contour.
https://docs.opencv.org/4.x/d3/dc0/group__imgproc__shape.html#gadf1ad6a0b82947fa1fe3c3d497f260e0

Linkedin web scraping snippet

I'm doing a web scraping data university research project. I started working on a ready GitHub project, but this project does not retrieve all the data.
The project works like this:
Search Google using keywords: example: (accountant 'email me at' Google)
Extract a snippet.
Retrieve data from this snippet.
The issue is:
The snippets extracted are like this: " ... marketing division in 2009. For more information on career opportunities with our company, email me: vicki#productivedentist.com. Neighborhood Smiles, LLCĀ ..."
The snippet does not show all, the "..." hides information like role, location... How can I retrieve all the information with the script?
from googleapiclient.discovery import build #For using Google Custom Search Engine API
import datetime as dt #Importing system date for the naming of the output file.
import sys
from xlwt import Workbook #For working on xls file.
import re #For email search using regex.
if __name__ == '__main__':
# Create an output file name in the format "srch_res_yyyyMMdd_hhmmss.xls in output folder"
now_sfx = dt.datetime.now().strftime('%Y%m%d_%H%M%S')
output_dir = './output/'
output_fname = output_dir + 'srch_res_' + now_sfx + '.xls'
search_term = sys.argv[1]
num_requests = int(sys.argv[2])
my_api_key = "replace_with_you_api_key" #Read readme.md to know how to get you api key.
my_cse_id = "011658049436509675749:gkuaxghjf5u" #Google CSE which searches possible LinkedIn profile according to query.
service = build("customsearch", "v1", developerKey=my_api_key)
wb=Workbook()
sheet1 = wb.add_sheet(search_term[0:15])
wb.save(output_fname)
sheet1.write(0,0,'Name')
sheet1.write(0,1,'Profile Link')
sheet1.write(0,2,'Snippet')
sheet1.write(0,3,'Present Organisation')
sheet1.write(0,4,'Location')
sheet1.write(0,5,'Role')
sheet1.write(0,6,'Email')
sheet1.col(0).width = 256 * 20
sheet1.col(1).width = 256 * 50
sheet1.col(2).width = 256 * 100
sheet1.col(3).width = 256 * 20
sheet1.col(4).width = 256 * 20
sheet1.col(5).width = 256 * 50
sheet1.col(6).width = 256 * 50
wb.save(output_fname)
row = 1 #To insert the data in the next row.
#Function to perform google search.
def google_search(search_term, cse_id, start_val, **kwargs):
res = service.cse().list(q=search_term, cx=cse_id, start=start_val, **kwargs).execute()
return res
for i in range(0, num_requests):
# This is the offset from the beginning to start getting the results from
start_val = 1 + (i * 10)
# Make an HTTP request object
results = google_search(search_term,
my_cse_id,
start_val,
num=10 #num value can be 1 to 10. It will give the no. of results.
)
for profile in range (0, 10):
snippet = results['items'][profile]['snippet']
myList = [item for item in snippet.split('\n')]
newSnippet = ' '.join(myList)
contain = re.search(r'[\w\.-]+#[\w\.-]+', newSnippet)
if contain is not None:
title = results['items'][profile]['title']
link = results['items'][profile]['link']
org = "-NA-"
location = "-NA-"
role = "-NA-"
if 'person' in results['items'][profile]['pagemap']:
if 'org' in results['items'][profile]['pagemap']['person'][0]:
org = results['items'][profile]['pagemap']['person'][0]['org']
if 'location' in results['items'][profile]['pagemap']['person'][0]:
location = results['items'][profile]['pagemap']['person'][0]['location']
if 'role' in results['items'][profile]['pagemap']['person'][0]:
role = results['items'][profile]['pagemap']['person'][0]['role']
print(title[:-23])
sheet1.write(row,0,title[:-23])
sheet1.write(row,1,link)
sheet1.write(row,2,newSnippet)
sheet1.write(row,3,org)
sheet1.write(row,4,location)
sheet1.write(row,5,role)
sheet1.write(row,6,contain[0])
print('Wrote {} search result(s)...'.format(row))
wb.save(output_fname)
row = row + 1
print('Output file "{}" written.'.format(output_fname))

Boto3/Lambda - Join multiple outputs from a loop and send in one email using AWS SNS

New to Python/Boto3, this should be an easy one but still learning :)
I have a Lambda function which creates a number of snapshots and works fine:
def create_snapshot():
volumes = ec2_client.describe_volumes(
Filters=[
{'N'...
...
for volume in volumes...
....
snap_name = 'Backup of ' + snap_desc
....
snap = ec2_client.create_snapshot(
VolumeId=vol_id,
Description=snap_desc
)
I then want to receive an email from AWS SNS to let me know which snapshots the function created, which I do using:
message = sns.publish(
TopicArn=SNSARN,
Subject=("Function executed"),
Message=("%s created" % snap_name)
)
The issue is that this creates an email for each snapshot, instead of one email listing all the snapshots. Should I create another function that calls all values produced by snap_desc, or can I send all values for snap_desc in the function? And most importantly what's the best way of doing this?
Cheers!
Scott
####################### UPDATE (Thanks #omuthu) #######################
I set an array inside and outside the loop, and put the string into the message. This produced the following being sent in one message:
The following snapshots have been created:
['vol-0e0b9a5dfb8379fc0 (Instance 1 - /dev/sda1)', 'vol-03aac6b65df64661e (Instance 4 - /dev/sda1)', 'vol-0fdde765dfg452631 (Instance 2 - /dev/sda1)', 'vol-0693a9568b11f625f (Instance 3 - /dev/sda1)', etc.
Okay got it sorted, finally!
def create_snapshot():
volumes = ec2_client.describe_volumes(
Filters=[
{'N'...
...
inst_list = []
for volume in volumes...
vol_id = volume['VolumeId']
....
snap_desc = vol_id
for name in volume['Tags']:
tag_key = name['Key']
tag_val = name['Value']
if tag_key == 'Name':
snap_desc = vol_id + ' (' + tag_val + ')'
....
....
....
if backup_mod is False or (current_hour + 10) % backup_mod != 0:
...
continue
else:
print("%s is scheduled this hour" % vol_id)
for name in volume['Tags']:
inst_tag_key = name['Key']
inst_tag_val = name['Value']
if inst_tag_key == 'Name':
inst_list.append(inst_tag_val)
snap = ec2_client.create_snapshot(
VolumeId=vol_id,
Description=snap_desc,
)
print("%s created" % snap['SnapshotId'])
msg = str("\n".join(inst_list))
if len(inst_list) != 0:
message = sns.publish(
TopicArn=SNSARN,
Subject=("Daily Lambda snapshot function complete"),
Message=("The following snapshots have been created today:\n\n" + msg + "\n")
)
print("Response: {}".format(message))

Assign Class attributes from list elements

I'm not sure if the title accurately describes what I'm trying to do. I have a Python3.x script that I wrote that will issue flood warning to my facebook page when the river near my home has reached it's lowest flood stage. Right now the script works, however it only reports data from one measuring station. I would like to be able to process the data from all of the stations in my county (total of 5), so I was thinking that maybe a class method may do the trick but I'm not sure how to implement it. I've been teaching myself Python since January and feel pretty comfortable with the language for the most part, and while I have a good idea of how to build a class object I'm not sure how my flow chart should look. Here is the code now:
#!/usr/bin/env python3
'''
Facebook Flood Warning Alert System - this script will post a notification to
to Facebook whenever the Sabine River # Hawkins reaches flood stage (22.3')
'''
import requests
import facebook
from lxml import html
graph = facebook.GraphAPI(access_token='My_Access_Token')
river_url = 'http://water.weather.gov/ahps2/river.php?wfo=SHV&wfoid=18715&riverid=203413&pt%5B%5D=147710&allpoints=143204%2C147710%2C141425%2C144668%2C141750%2C141658%2C141942%2C143491%2C144810%2C143165%2C145368&data%5B%5D=obs'
ref_url = 'http://water.weather.gov/ahps2/river.php?wfo=SHV&wfoid=18715&riverid=203413&pt%5B%5D=147710&allpoints=143204%2C147710%2C141425%2C144668%2C141750%2C141658%2C141942%2C143491%2C144810%2C143165%2C145368&data%5B%5D=all'
def checkflood():
r = requests.get(river_url)
tree = html.fromstring(r.content)
stage = ''.join(tree.xpath('//div[#class="stage_stage_flow"]//text()'))
warn = ''.join(tree.xpath('//div[#class="current_warns_statmnts_ads"]/text()'))
stage_l = stage.split()
level = float(stage_l[2])
#check if we're at flood level
if level < 22.5:
pass
elif level == 37:
major_diff = level - 23.0
major_r = ('The Sabine River near Hawkins, Tx has reached [Major Flood Stage]: #', stage_l[2], 'Ft. ', str(round(major_diff, 2)), ' Ft. \n Please click the link for more information.\n\n Current Warnings and Alerts:\n ', warn)
major_p = ''.join(major_r)
graph.put_object(parent_object='me', connection_name='feed', message = major_p, link = ref_url)
<--snip-->
checkflood()
Each station has different 5 different catagories for flood stage: Action, Flood, Moderate, Major, each different depths per station. So for Sabine river in Hawkins it will be Action - 22', Flood - 24', Moderate - 28', Major - 32'. For the other statinos those depths are different. So I know that I'll have to start out with something like:
class River:
def __init__(self, id, stage):
self.id = id #station ID
self.stage = stage #river level'
#staticmethod
def check_flood(stage):
if stage < 22.5:
pass
elif stage.....
but from there I'm not sure what to do. Where should it be added in(to?) the code, should I write a class to handle the Facebook postings as well, is this even something that needs a class method to handle, is there any way to clean this up for efficiency? I'm not looking for anyone to write this up for me, but some tips and pointers would sure be helpful. Thanks everyone!
EDIT Here is what I figured out and is working:
class River:
name = ""
stage = ""
action = ""
flood = ""
mod = ""
major = ""
warn = ""
def checkflood(self):
if float(self.stage) < float(self.action):
pass
elif float(self.stage) >= float(self.major):
<--snip-->
mineola = River()
mineola.name = stations[0]
mineola.stage = stages[0]
mineola.action = "13.5"
mineola.flood = "14.0"
mineola.mod = "18.0"
mineola.major = "21.0"
mineola.alert = warn[0]
hawkins = River()
hawkins.name = stations[1]
hawkins.stage = stages[1]
hawkins.action = "22.5"
hawkins.flood = "23.0"
hawkins.mod = "32.0"
hawkins.major = "37.0"
hawkins.alert = warn[1]
<--snip-->
So from here I'm tring to stick all the individual river blocks into one block. What I have tried so far is this:
class River:
... name = ""
... stage = ""
... def testcheck(self):
... return self.name, self.stage
...
>>> for n in range(num_river):
... stations[n] = River()
... stations[n].name = stations[n]
... stations[n].stage = stages[n]
...
>>> for n in range(num_river):
... stations[n].testcheck()
...
<__main__.River object at 0x7fbea469bc50> 4.13
<__main__.River object at 0x7fbea46b4748> 20.76
<__main__.River object at 0x7fbea46b4320> 22.13
<__main__.River object at 0x7fbea46b4898> 16.08
So this doesn't give me the printed results that I was expecting. How can I return the string instead of the object? Will I be able to define the Class variables in this manner or will I have to list them out individually? Thanks again!
After reading many, many, many articles and tutorials on class objects I was able to come up with a solution for creating the objects using list elements.
class River():
def __init__(self, river, stage, flood, action):
self.river = river
self.stage = stage
self.action = action
self.flood = flood
self.action = action
def alerts(self):
if float(self.stage < self.flood):
#alert = "The %s is below Flood Stage (%sFt) # %s Ft. \n" % (self.river, self.flood, self.stage)
pass
elif float(self.stage > self.flood):
alert = "The %s has reached Flood Stage(%sFt) # %sFt. Warnings: %s \n" % (self.river, self.flood, self.stage, self.action)
return alert
'''this is the function that I was trying to create
to build the class objects automagically'''
def riverlist():
river_list = []
for n in range(len(rivers)):
station = River(river[n], stages[n], floods[n], warns[n])
river_list.append(station)
return river_list
if __name__ == '__main__':
for x in riverlist():
print(x.alerts())

(WinApi) ChangeDisplaySettingsEx does not work

I'm trying to write a python script to switch the primary monitor.
I have 3 Monitors (one is plugged into my i5's graphics chip, and 2 are plugged into a ATI HD7870)
I wrote the following script:
import win32api as w
import win32con as c
i = 0
workingDevices = []
def setPrimary(id):
global workingDevices
return w.ChangeDisplaySettingsEx(
workingDevices[id].DeviceName,
w.EnumDisplaySettings(
workingDevices[id].DeviceName,
c.ENUM_CURRENT_SETTINGS
),
c.CDS_SET_PRIMARY | c.CDS_UPDATEREGISTRY | c.CDS_RESET) \
== c.DISP_CHANGE_SUCCESSFUL
while True:
try:
Device = w.EnumDisplayDevices(None, i, 1)
if Device.StateFlags & c.DISPLAY_DEVICE_ATTACHED_TO_DESKTOP: #Attached to desktop
workingDevices.append(Device)
i += 1
except:
break
print("Num Devices: ", len(workingDevices))
for dev in workingDevices:
print("Name: ", dev.DeviceName)
Invoking it leads to:
In [192]: %run test.py
Num Devices: 3
Name: \\.\DISPLAY1
Name: \\.\DISPLAY2
Name: \\.\DISPLAY7
In [193]: setPrimary(0)
Out[193]: True
In [194]: setPrimary(1)
Out[194]: True
In [195]: setPrimary(2)
Out[195]: True
So far it looks great, but the problem is: nothing changes. My monitors flicker shortly because of the CDS_RESET but the primary screen does not change, although ChangeDisplaySettingsEx returns DISP_CHANGE_SUCCESSFUL
Does anyone have an Idea why?
(I use Python 3.5.1 and PyWin32 build 220)
PS I use 1 as the third arg for EnumDisplayDevices because the msdn states it should be set to one, although the PyWin help says it should be set to 0.
But the behaviour of the script does not change independent of this value beeing one or zero
Ok, I found the solution.
Apperantly the primary monitor must always be at position (0, 0).
So when I tried to set another monitor to primary its position was set to (0, 0) which caused it to intersect with the old primary one.
It seems the way to go is to update the positions of all Monitors, and write those changes to the registry, and then once this is done apply the changes by calling ChangeDisplaySettingsEx() with default parameters.
This is my new (now working) code:
import win32api as w
import win32con as c
def load_device_list():
"""loads all Monitor which are plugged into the pc
The list is needed to use setPrimary
"""
workingDevices = []
i = 0
while True:
try:
Device = w.EnumDisplayDevices(None, i, 0)
if Device.StateFlags & c.DISPLAY_DEVICE_ATTACHED_TO_DESKTOP: #Attached to desktop
workingDevices.append(Device)
i += 1
except:
return workingDevices
def setPrimary(id, workingDevices, MonitorPositions):
"""
param id: index in the workingDevices list.
Designates which display should be the new primary one
param workingDevices: List of Monitors returned by load_device_list()
param MonitorPositions: dictionary of form {id: (x_position, y_position)}
specifies the monitor positions
"""
FlagForPrimary = c.CDS_SET_PRIMARY | c.CDS_UPDATEREGISTRY | c.CDS_NORESET
FlagForSec = c.CDS_UPDATEREGISTRY | c.CDS_NORESET
offset_X = - MonitorPositions[id][0]
offset_Y = - MonitorPositions[id][1]
numDevs = len(workingDevices)
#get devmodes, correct positions, and update registry
for i in range(numDevs):
devmode = w.EnumDisplaySettings(workingDevices[i].DeviceName, c.ENUM_CURRENT_SETTINGS)
devmode.Position_x = MonitorPositions[i][0] + offset_X
devmode.Position_y = MonitorPositions[i][1] + offset_Y
if(w.ChangeDisplaySettingsEx(workingDevices[i].DeviceName, devmode,
FlagForSec if i != id else FlagForPrimary) \
!= c.DISP_CHANGE_SUCCESSFUL): return False
#apply Registry updates once all settings are complete
return w.ChangeDisplaySettingsEx() == c.DISP_CHANGE_SUCCESSFUL;
if(__name__ == "__main__"):
devices = load_device_list()
for dev in devices:
print("Name: ", dev.DeviceName)
MonitorPositions = {
0: (0, -1080),
1: (0, 0),
2: (1920, 0)
}
setPrimary(0, devices, MonitorPositions)

Resources