Export data from a file into Excel Python 2.7 - excel

I would need to create an Excel sheet out of a file. The file has this format, but has dozens of templates like this one. To match the representation of my entire file, the following format can be copy pasted twice:
Field Value
OS-DCF:diskConfig MANUAL
OS-EXT-AZ:availability_zone az1
OS-EXT-STS:power_state 1
OS-EXT-STS:task_state None
OS-EXT-STS:vm_state active
OS-SRV-USG:launched_at 2016-02-04T12:53:35.000000
OS-SRV-USG:terminated_at None
accessIPv4
accessIPv6
addresses 10.10.10.10
config_drive True
created 2018-09-04T12:52:52Z
flavor m1.small
hostId ajsdajdad-qweqweqw-qwe123123-qweqweq-sadsadasd-1121212
id 922adwq-qwejjqeq-123123-asdasa
image cirros1
key_name None
name vm1
os-extended-volumes:volumes_attached []
progress 0
project_id id
properties ctrl='10.10.10.3', token='token', tenant='tenant1'
scheduler_hints {}
security_groups [{u'name': u'sg1'}, {u'name': u'sg2'}]
status ACTIVE
updated 2016-02-04T12:53:35Z
user_id user1
The Excel file should look like this:
The problematic part seems to be here:
hostname, chains_segment = host.split('\n',1)
hostname = hostname.strip()
After the strip, I should get the VM name value, but I am getting empty values.
Here is the full code:
import xlsxwriter
import argparse
parser=argparse.ArgumentParser(description="Script")
parser.add_argument('-i','--input',help='Input log file name',required=True)
parser.add_argument('-o','--output',help='Desired name for the Excel file',required=True)
parser.add_argument('-s','--sheet',help='Desired name of the Excel sheet(Default: Sheet1)',default='Sheet1',required=False)
args=parser.parse_args()
az='| OS-EXT-AZ:availability_zone | '
state='| OS-EXT-STS:vm_state | '
launch='| OS-SRV-USG:launched_at | '
ipaddr='| addresses | '
flavor='| flavor | '
image='| image | '
def create_chain(chain_segment):
chains=[]
chain_lines = [line for line in chain_segment.split('\n') if line]
for line in chain_lines:
chain={}
if launch in line:
chain['launch'] = line.split()[3]
# chain['az'] = line.split()[3]
# chain['state'] = line.split()[3]
# chain['ipaddr'] = line.split()[3]
# chain['flavor'] = line.split()[3]
# chain['image'] = line.split()[3]
chains.append(chain)
chains=filter(None, chains)
chains=list(chains)
chained = [merge_dicts(chains[0], i) for i in chains[1:]]
return chained
def merge_dicts(x,y):
z=x.copy()
z.update(y)
return z
with open(args.input) as f:
log_content = f.read()
host_sections = [host for host in log_content.split(" Field Value") if host]
hosts = {}
for host in host_sections:
hostname, chains_segment = host.split('\n',1)
hostname = hostname.strip()
chains=[]
for segment in chains_segment.split('\n\n'):
chains.extend(create_chain(segment))
hosts[hostname] = chains
workbook=xlsxwriter.Workbook(args.output)
worksheet1=workbook.add_worksheet(args.sheet)
worksheet1.write(0,0,'VM')
worksheet1.write(0,1,'Availability Zone')
worksheet1.write(0,2,'State')
worksheet1.write(0,3,'Launched at')
worksheet1.write(0,4,'IP Address')
worksheet1.write(0,5,'Flavor')
worksheet1.write(0,6,'Image')
worksheet1.write(0,7,'Tenant')
worksheet1.write(0,8,'Security Group')
row = 1
for host, chains in hosts.items():
for chain in chains:
worksheet1.write(row, 0, host)
worksheet1.write(row, 1, chain.get('az'))
worksheet1.write(row, 2, chain.get('state'))
worksheet1.write(row, 3, chain.get('launch'))
worksheet1.write(row, 4, chain.get('ipaddr'))
worksheet1.write(row, 5, chain.get('flavor'))
worksheet1.write(row, 6, chain.get('image'))
row += 1
workbook.close()
Any idea how I can correct this?
Many thanks,
Albert
P.S. Please note that I am new to programming.

Related

IndexError:: array index out of range

python3
def __init__(self):
super().__init__('object_tracking')
# Declare ROS parameters
self.declare_parameters(namespace='',
parameters=[('qos_length',0),
('topic.untracked_obj',''),
('topic.rgb_image',''),
('topic.tracked_obj',''),
('obj_class.id',[]),
('obj_class.name',[]),
('display',True),
('frame_id.tracked_obj','')])
self.nodeParams()
qos_length = self.get_parameter('qos_length').get_parameter_value().integer_value
qos_profile = QoSProfile(depth=qos_length,
history=QoSHistoryPolicy.KEEP_LAST,
reliability=QoSReliabilityPolicy.RELIABLE)
# Load cv_bridge
self.bridge = CvBridge()
# Create instance of SORT
self.mot_tracker = Sort()
# Create Subscribers
obj_topic = self.get_parameter('topic.untracked_obj').get_parameter_value().string_value
self.obj_sub = mf.Subscriber(self,ObjectArray,obj_topic,qos_profile=qos_profile)
rgb_topic = self.get_parameter('topic.rgb_image').get_parameter_value().string_value
self.rgb_sub = mf.Subscriber(self,Image,rgb_topic,qos_profile=qos_profile)
# Apply message filter
self.timestamp_sync = mf.TimeSynchronizer([self.obj_sub,self.rgb_sub],queue_size=qos_length)
self.timestamp_sync.registerCallback(self.objCallback)
# Create Publishers
obj_topic = self.get_parameter('topic.tracked_obj').get_parameter_value().string_value
self.obj_pub = self.create_publisher(ObjectArray,obj_topic,qos_profile)
def nodeParams(self):
#print('1')
self.display = self.get_parameter('display').get_parameter_value().bool_value
class_id = self.get_parameter('obj_class.id').get_parameter_value().integer_array_value
#print(class_id)
class_name = self.get_parameter('obj_class.name').get_parameter_value().integer_array_value
#print(class_name)
self.class_dict = {}
#for name in class_name:
'''#for i,id_ in enumerate(class_id):
#print('2')
#self.class_dict = class_name [name]
#print('3')'''
for i,id_ in enumerate(class_id):
self.class_dict[int(id_)] = class_name[i]
I'm not sure what's going on...I'd like to try object tracking in Carla 0.9.13 with ros2 foxy in Python 3.8. Could you please help me?
[object_tracking.py-3] self.nodeParams()
[object_tracking.py-3] File "/home/smit/ros2_ws/install/carla_simulation/lib/carla_simulation/object_tracking.py", line 64, in nodeParams
[object_tracking.py-3] self.class_dict[int(id_)] = class_name[i]
[object_tracking.py-3] IndexError: array index out of range
[ERROR] [object_tracking.py-3]: process has died [pid 623526, exit code 1, cmd '/home/smit/ros2_ws/install/carla_simulation/lib/carla_simulation/object_tracking.py --ros-args --params-file /home/smit/ros2_ws/install/carla_simulation/share/carla_simulation/config/params.yaml'].
You are pobably using the returned hierarchy variable wrong.
According to the specification:
In Python, hierarchy is nested inside a top level array. Use hierarchy[0][i] to access hierarchical elements of i-th contour.
https://docs.opencv.org/4.x/d3/dc0/group__imgproc__shape.html#gadf1ad6a0b82947fa1fe3c3d497f260e0

Extract second IP from lines

Is there a way to extract the second IP address from a command-line output?
Command output
Manual NAT Policies (Section 1)
60 (sdf-app-vip) to (outside) source dynamic d-d-servers interface destination static obj-15.34.4.32 obj-159.13.9.12
translate_hits = 0, untranslate_hits = 0
61 (ds-app-vip) to (outside) source dynamic d-d-servers interface destination static obj-15.1.95.176 obj-15.13.5.176
translate_hits = 0, untranslate_hits = 0
152 (sd-app-vip) to (outside) source dynamic d-d-servers interface destination static obj-19.36.11.12 obj-19.36.15.12
translate_hits = 0, untranslate_hits = 0
Auto NAT Policies (Section 2)
115 (nk-app-vip) to (customer-vrf-sd) source static nat-10.19.2.190-customer-vrf-transit 10.223.2.2
translate_hits = 0, untranslate_hits = 4652
My code is able to extract both IP, but am not able to filter the second IP.
Code:
import re
#Truncate file
ft=open('puip_only.txt','w')
ft.truncate()
ft.close()
#Filter IP's from object group IP output
cip=open('puip.txt', 'r')
cs=cip.readlines()
for line in cs:
matches= re.findall(r'[0-9]+(?:\.[0-9]+){3}', line)
newlines=( ' '.join(matches))
outF = open("puip_only.txt", "a")
outF.write(newlines)
outF.write("\n")
outF.close()
Expected output is
159.13.9.12
15.13.5.176
19.36.15.12
10.223.2.2
If you only want the second IP, don't join it with the first:
if len(matches)>=2:
outF.write(matches[1])
instead of
outF.write(newlines)
You can match the following space, word chars and - and then capture the second ip number in group 1.
The group 1 values will be returned by re.findall.
\b[0-9]+(?:\.[0-9]+){3}\s+\w+-([0-9]+(?:\.[0-9]+){3})\b
Regex demo

Linkedin web scraping snippet

I'm doing a web scraping data university research project. I started working on a ready GitHub project, but this project does not retrieve all the data.
The project works like this:
Search Google using keywords: example: (accountant 'email me at' Google)
Extract a snippet.
Retrieve data from this snippet.
The issue is:
The snippets extracted are like this: " ... marketing division in 2009. For more information on career opportunities with our company, email me: vicki#productivedentist.com. Neighborhood Smiles, LLCĀ ..."
The snippet does not show all, the "..." hides information like role, location... How can I retrieve all the information with the script?
from googleapiclient.discovery import build #For using Google Custom Search Engine API
import datetime as dt #Importing system date for the naming of the output file.
import sys
from xlwt import Workbook #For working on xls file.
import re #For email search using regex.
if __name__ == '__main__':
# Create an output file name in the format "srch_res_yyyyMMdd_hhmmss.xls in output folder"
now_sfx = dt.datetime.now().strftime('%Y%m%d_%H%M%S')
output_dir = './output/'
output_fname = output_dir + 'srch_res_' + now_sfx + '.xls'
search_term = sys.argv[1]
num_requests = int(sys.argv[2])
my_api_key = "replace_with_you_api_key" #Read readme.md to know how to get you api key.
my_cse_id = "011658049436509675749:gkuaxghjf5u" #Google CSE which searches possible LinkedIn profile according to query.
service = build("customsearch", "v1", developerKey=my_api_key)
wb=Workbook()
sheet1 = wb.add_sheet(search_term[0:15])
wb.save(output_fname)
sheet1.write(0,0,'Name')
sheet1.write(0,1,'Profile Link')
sheet1.write(0,2,'Snippet')
sheet1.write(0,3,'Present Organisation')
sheet1.write(0,4,'Location')
sheet1.write(0,5,'Role')
sheet1.write(0,6,'Email')
sheet1.col(0).width = 256 * 20
sheet1.col(1).width = 256 * 50
sheet1.col(2).width = 256 * 100
sheet1.col(3).width = 256 * 20
sheet1.col(4).width = 256 * 20
sheet1.col(5).width = 256 * 50
sheet1.col(6).width = 256 * 50
wb.save(output_fname)
row = 1 #To insert the data in the next row.
#Function to perform google search.
def google_search(search_term, cse_id, start_val, **kwargs):
res = service.cse().list(q=search_term, cx=cse_id, start=start_val, **kwargs).execute()
return res
for i in range(0, num_requests):
# This is the offset from the beginning to start getting the results from
start_val = 1 + (i * 10)
# Make an HTTP request object
results = google_search(search_term,
my_cse_id,
start_val,
num=10 #num value can be 1 to 10. It will give the no. of results.
)
for profile in range (0, 10):
snippet = results['items'][profile]['snippet']
myList = [item for item in snippet.split('\n')]
newSnippet = ' '.join(myList)
contain = re.search(r'[\w\.-]+#[\w\.-]+', newSnippet)
if contain is not None:
title = results['items'][profile]['title']
link = results['items'][profile]['link']
org = "-NA-"
location = "-NA-"
role = "-NA-"
if 'person' in results['items'][profile]['pagemap']:
if 'org' in results['items'][profile]['pagemap']['person'][0]:
org = results['items'][profile]['pagemap']['person'][0]['org']
if 'location' in results['items'][profile]['pagemap']['person'][0]:
location = results['items'][profile]['pagemap']['person'][0]['location']
if 'role' in results['items'][profile]['pagemap']['person'][0]:
role = results['items'][profile]['pagemap']['person'][0]['role']
print(title[:-23])
sheet1.write(row,0,title[:-23])
sheet1.write(row,1,link)
sheet1.write(row,2,newSnippet)
sheet1.write(row,3,org)
sheet1.write(row,4,location)
sheet1.write(row,5,role)
sheet1.write(row,6,contain[0])
print('Wrote {} search result(s)...'.format(row))
wb.save(output_fname)
row = row + 1
print('Output file "{}" written.'.format(output_fname))

Trying to convert a file that lists a single IP address to that addresses class C network range

Here's the function i created:
def convert(inc_ip):
ip_address = inc_ip
ip_parts = ip_address.split('.')
ip_copy = ip_parts.copy()
ip_list = [ip_parts[0], ip_parts[1], ip_parts[2], ip_parts[3]]
ip_list[3] = '1'
ip_list_copy = [ip_copy[0], ip_copy[1], ip_copy[2], ip_copy[3]]
ip_list_copy[3] = '254'
join = '.'
ip_join = join.join(ip_list)
ip_join_copy = join.join(ip_list_copy)
ip_range = (str(ip_join) + '-' + str(ip_join_copy))
print("-" * 30, 'ip_adddress', "-" * 27)
print(ip_address)
print("-" * 30, 'ip_range', "-" * 30)
print(ip_range)
And here's what i've written to try and achieve the goal:
with open('ip address.txt', 'r') as rf:
lines = rf.read().split('/n')
for line in lines:
inc_ip = line
convert(inc_ip)
print(lines)
The output is a step in the right direction but isn't quite there:
------------------------------ ip_range ------------------------------
10.0.0.1-10.0.0.254
['10.0.0.0\n10.0.12.0
I'm trying to convert all the ip addresses listed (about 300) in either the output of pycharm or a saved to a text file
How would i go about fixing this?
I think you need to replace "/n" to "\n", that the reason why you split is not working.
So like this :
with open('ip address.txt', 'r') as rf:
lines = rf.read().split('/n')
for line in lines:
inc_ip = line
convert(inc_ip)
print(lines)
This is giving me this output :
------------------------------ ip_adddress ---------------------------
10.0.0.0
------------------------------ ip_range ------------------------------
10.0.0.1-10.0.0.254
------------------------------ ip_adddress ---------------------------
10.0.12.0
------------------------------ ip_range ------------------------------
10.0.12.1-10.0.12.254
['10.0.0.0', '10.0.12.0']
Note : I suggest for future post to explain more what you are trying to achieved, and when you are using a file such as ip address.txt, that you post what it contains to help user to reply

Using python configparser to update .ini file section value but cant figure how to update the correct value as directed

I have been tasked with creating a program that will update a value in an ini file. The ini file looks like;
[link1]
name = nodeB
ip = 127.0.0.1
port = 1002
cost = 190
[link2]
name = nodeC
ip = 127.0.0.1
port = 1003
cost = 210
The command to update this ini file can only take two parameters, neighbor name and cost. I cant figure out how to update the values without saying which section the neighbor is in. The example of the parameters in use is UpdateRouteCost nodeB 4.
I am able to update a value by stating the section and the key I want updated.
elif text == "UpdateRouteCost":
parser = configparser.ConfigParser()
parser.read('try.ini')
t = open('try.ini', 'w')
parser.set('link1', 'cost', '1')
parser.write(t)
t.close()
print(parser.get('link1', 'cost'))

Resources