I am attempting to insert GRErouting layer in between GRE and IP using Scapy. The pcap I am reading contains a single packet stacked as follows: Ethernet/IPv4/GRE/IPv4/ICMP.
What I see is that getLayer returns the current layer + its payload, which may include other layers, and that's not good for me. I would like to only get the current layer. When I do getLayer for each layer, and then write the entire array I get a strange pcap because of the additional payload that each layer has over it.
I am also not able to use a simple 'print' to output any data to console. I understand this is because Scapy adds the logging module, and suppresses system logging, but I'd like to know how to undo that and be able to use the 'print' statement.
import os
import sys
import logging
logging.basicConfig()
logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
from scapy.all import PcapReader, PcapWriter, fuzz, Packet
from scapy.layers.l2 import GRE, GRErouting
from scapy.layers.inet import IP
logging.getLogger("scapy.runtime").setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
def foo(in_filename, out_filename):
f = PcapReader(in_filename)
o = PcapWriter(out_filename)
p = f.read_packet()
while p:
layers = []
counter = 0
while True:
layer = p.getlayer(counter)
if (layer != None):
layers.append(layer)
if (type(layer) is IP):
del layer.chksum
if (type(layer) is GRE):
logging.getLogger().debug("there is a GRE layer")
layer.routing_present = 1
gr = GRErouting()
fuzz(gr)
layers.append(gr)
del layer.chksum
else:
break
counter += 1
logging.getLogger().debug("Layers are: %s\t\t",layers)
for l in layers:
logging.getLogger().debug("%s", l)
o.write(layers)
p = f.read_packet()
f.close()
o.close()
if __name__ == "__main__":
logging.getLogger().debug('Executing main')
if (len(sys.argv) == 3):
in_filename = str(sys.argv[1])
out_filename = str(sys.argv[2])
if os.path.exists(in_filename) == False:
sys.stderr.write("Either {0} does not exist, or you do not have proper permissions\n".format(in_filename))
else:
foo(in_filename, out_filename)
else:
sys.stderr.write("USAGE: {0} <path to input file> <path to output file>\n".format(str(sys.argv[0])))
I was finally able to answer my own two questions. See modified code below:
# importing the os package (see api at http://docs.python.org/2.6/library/io.html)
import os
# import function 'basename' from module os.path
from os.path import basename
# importing the sys package (see api at http://docs.python.org/2.6/library/sys.html)
import sys
# importing the logging package (see api at http://docs.python.org/2.6/library/logging.html)
import logging
# by default Scapy attempts to find ipv6 routing information,
# and if it does not find any it prints out a warning when running the module.
# the following statement changes log level to ERROR so that this warning will not
# occur
effective_level = logging.getLogger("scapy.runtime").getEffectiveLevel()
logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
# importing Scapy
from scapy.all import PcapReader, PcapWriter
from scapy.layers.l2 import GRE, GRErouting, NoPayload
# return the log level o be what it was
logging.getLogger("scapy.runtime").setLevel(effective_level)
# unfortunately, somewhere in Scapy sys.stdout is being reset.
# thus, using the 'print' command will not produce output to the console.
# the following two lines place stdout back into sys.
if sys.stdout != sys.__stdout__:
sys.stdout = sys.__stdout__
# this is a function declaration. there is no need for explicit types.
# python can infer an object type from its usage
def foo(in_filename, out_filename):
# open the input file for reading
f = PcapReader(in_filename)
# open the output file for writing
o = PcapWriter(out_filename)
# read the first packet from the input file
p = f.read_packet()
# while we haven't processed the last packet
while p:
# gets the first layer of the current packet
layer = p.firstlayer()
# loop over the layers
while not isinstance(layer, NoPayload):
if layer.default_fields.has_key('chksum'):
del layer.chksum
if layer.default_fields.has_key('len'):
del layer.len
if (type(layer) is GRE):
layer.routing_present = 1
layer.chksum_present = 1
# make sure to delete the checksum field. hopefully scapy will calculate it correctly one day
del layer.chksum
gr = GRErouting()
gr.address_family = 0x0800
gr.SRE_len = 4
gr.SRE_offset = 0
gr.routing_info = "1111"
# the NULL routing field
empty_gr = GRErouting()
empty_gr.address_family = 0x0000
empty_gr.SRE_len = 0
gr.add_payload(empty_gr)
gr.add_payload(layer.payload)
layer.remove_payload()
layer.add_payload(gr)
layer = empty_gr
# advance to the next layer
layer = layer.payload
# write the packet we just dissected into the output file
o.write(p)
# read the next packet
p = f.read_packet()
# close the input file
f.close()
# close the output file
o.close()
# i believe this is needed only if we are running the this module
# as the main module. i don't know if this will get executed if this module
# is imported into some other main module
if __name__ == "__main__":
# letting the user know we are starting.
# sys.argv[0] includes the path to the module, including the module name.
# convert sys.argv[0] into string, and extract the module name only
# (using basename)
print '===> Running', basename(str(sys.argv[0]))
# making sure that two parameters were entered on the command line
if (len(sys.argv) == 3):
# get the path to the input file
in_filename = str(sys.argv[1])
# get the path to the output file
out_filename = str(sys.argv[2])
# make sure the input file actually exists.
# if it doesn't, we print out an error and exit
if os.path.exists(in_filename) == False:
# note the method for entering conversion specifiers ({<ordinal>})
sys.stderr.write("Either {0} does not exist, or you do not have proper permissions\n".format(in_filename))
else:
# if the input file does exist, execute 'foo'
foo(in_filename, out_filename)
# print an end script notification
print basename(str(sys.argv[0])), '===> completed successfully'
else:
# write a USAGE message to the standard output stream
sys.stderr.write("USAGE: {0} <path to input file> <path to output file>\n".format(basename(str(sys.argv[0]))))
Related
Update: turns out od.download() returns None by design.
What might be better than a None check for od.download() "failure"?
I am downloading a .zip file using opendatasets lib.
In iris_scans(); line print(download), without the if-statement prints None.
However, at invocation scans = iris_scans() data is returned and subsequent prints can display data successfully.
The purpose of the if-statement is for "Graceful error handling".
Note: I've used an if-statement instead of try-except as there are many possibilities why download == None (e.g. dead link, connection interrupt etc.)
pip3 install opendatasets
import opendatasets as od
import zipfile
import os
import shutil
from PIL import Image
import numpy as np
def iris_scans():
download = od.download('http://www.mae.cuhk.edu.hk/~cvl/iris_database/iris_database.zip')
"""
if download == None:
print('Iris Scans - Link could not be established')
return [[]*1778]
"""
print(download)
path_extract = 'iris_database/'
with zipfile.ZipFile('iris_database.zip', 'r') as zip_ref:
zip_ref.extractall(path_extract)
os.remove(path_extract + 'readme.txt')
filenames = os.listdir(path_extract)
scans = []
for f in filenames:
img = Image.open(path_extract + f)
#print("IMG", img)
matrix = np.array(img)
#print("MATRIX", matrix)
scans.append(matrix)
shutil.rmtree(path_extract)
os.remove(path_extract[:-1] + '.zip')
# Data Augmentation
scans_90 = [np.rot90(s) for s in scans]
scans_180 = [np.rot90(s) for s in scans_90]
scans_270 = [np.rot90(s) for s in scans_180]
scans_flip = [np.flip(s) for s in scans]
scans_flip_90 = [np.rot90(s) for s in scans_flip]
scans_flip_180 = [np.rot90(s) for s in scans_flip_90]
scans_flip_270 = [np.rot90(s) for s in scans_flip_180]
scans += scans_90
scans += scans_180
scans += scans_270
scans += scans_flip_90
scans += scans_flip_180
scans += scans_flip_270
return scans
scans = iris_scans()
print(scans[0])
print(len(scans))
The original question was a road-block on the path of implementing some form of Exception Handling for the download.
od.download() == None by design; so an alternative to if download == None needed to be made.
As pointed out and assisted by #Henry; the below Try-except incorporates all exceptions found in the Github source.
...
import urllib
def iris_scans():
try:
download = od.download('http://www.dgcdgyugcwyugyugcasc.com/wqduiuwqdwq') # BROKEN
...
return scans
except (urllib.error.URLError, IOError, RuntimeError) as e:
print('Iris Scans - failed')
return [[]*1778]
Iris Scans - failed
[]
1
Top answer to this post demos many exceptions on one line.
I'm working on a bot for a competition that receives its input through sys.stdin and uses Python's print() for output. I have the following:
import sys
def main():
while True:
line = sys.stdin.readline()
parts = line.split()
if len(parts) > 0:
# do stuff
The problem is that the input comes in through a stream and using the above, blocks me from printing anything back until the stream is closed. What can I do to make this work?
By turning blocking off you can only read a character at a time. So, there is no way to get readline() to work in a non-blocking context. I assume you just want to read key presses to control the robot.
I have had no luck using select.select() on Linux and created a way with tweaking termios settings. So, this is Linux specific but works for me:
import atexit, termios
import sys, os
import time
old_settings=None
def init_any_key():
global old_settings
old_settings = termios.tcgetattr(sys.stdin)
new_settings = termios.tcgetattr(sys.stdin)
new_settings[3] = new_settings[3] & ~(termios.ECHO | termios.ICANON) # lflags
new_settings[6][termios.VMIN] = 0 # cc
new_settings[6][termios.VTIME] = 0 # cc
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, new_settings)
#atexit.register
def term_any_key():
global old_settings
if old_settings:
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
def any_key():
ch_set = []
ch = os.read(sys.stdin.fileno(), 1)
while ch is not None and len(ch) > 0:
ch_set.append( ord(ch[0]) )
ch = os.read(sys.stdin.fileno(), 1)
return ch_set
init_any_key()
while True:
key = any_key()
if key is not None:
print(key)
else:
time.sleep(0.1)
A better Windows or cross-platform answer is here: Non-blocking console input?
You can use selectors for handle I/O multiplexing:
https://docs.python.org/3/library/selectors.html
Try this out:
#! /usr/bin/python3
import sys
import fcntl
import os
import selectors
# set sys.stdin non-blocking
orig_fl = fcntl.fcntl(sys.stdin, fcntl.F_GETFL)
fcntl.fcntl(sys.stdin, fcntl.F_SETFL, orig_fl | os.O_NONBLOCK)
# function to be called when enter is pressed
def got_keyboard_data(stdin):
print('Keyboard input: {}'.format(stdin.read()))
# register event
m_selector = selectors.DefaultSelector()
m_selector.register(sys.stdin, selectors.EVENT_READ, got_keyboard_data)
while True:
sys.stdout.write('Type something and hit enter: ')
sys.stdout.flush()
for k, mask in m_selector.select():
callback = k.data
callback(k.fileobj)
The above code will hold on the line
for k, mask in m_selector.select():
until a registered event occurs, returning a selector_key instance (k) and a mask of monitored events.
In the above example we registered only one event (Enter key press):
m_selector.register(sys.stdin, selectors.EVENT_READ, got_keyboard_data)
The selector key instance is defined as follows:
abstractmethod register(fileobj, events, data=None)
Therefore, the register method sets k.data as our callback function got_keyboard_data, and calls it when the Enter key is pressed:
callback = k.data
callback(k.fileobj)
A more complete example (and hopefully more useful) would be to multiplex stdin data from user with incoming connections from network:
import selectors
import socket
import sys
import os
import fcntl
m_selector = selectors.DefaultSelector()
# set sys.stdin non-blocking
def set_input_nonblocking():
orig_fl = fcntl.fcntl(sys.stdin, fcntl.F_GETFL)
fcntl.fcntl(sys.stdin, fcntl.F_SETFL, orig_fl | os.O_NONBLOCK)
def create_socket(port, max_conn):
server_addr = ('localhost', port)
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.setblocking(False)
server.bind(server_addr)
server.listen(max_conn)
return server
def read(conn, mask):
global GO_ON
client_address = conn.getpeername()
data = conn.recv(1024)
print('Got {} from {}'.format(data, client_address))
if not data:
GO_ON = False
def accept(sock, mask):
new_conn, addr = sock.accept()
new_conn.setblocking(False)
print('Accepting connection from {}'.format(addr))
m_selector.register(new_conn, selectors.EVENT_READ, read)
def quit():
global GO_ON
print('Exiting...')
GO_ON = False
def from_keyboard(arg1, arg2):
line = arg1.read()
if line == 'quit\n':
quit()
else:
print('User input: {}'.format(line))
GO_ON = True
set_input_nonblocking()
# listen to port 10000, at most 10 connections
server = create_socket(10000, 10)
m_selector.register(server, selectors.EVENT_READ, accept)
m_selector.register(sys.stdin, selectors.EVENT_READ, from_keyboard)
while GO_ON:
sys.stdout.write('>>> ')
sys.stdout.flush()
for k, mask in m_selector.select():
callback = k.data
callback(k.fileobj, mask)
# unregister events
m_selector.unregister(sys.stdin)
# close connection
server.shutdown()
server.close()
# close select
m_selector.close()
You can test using two terminals.
first terminal:
$ python3 test.py
>>> bla
open another terminal and run:
$ nc localhost 10000
hey!
back to the first
>>> qwerqwer
Result (seen on the main terminal):
$ python3 test.py
>>> bla
User input: bla
>>> Accepting connection from ('127.0.0.1', 39598)
>>> Got b'hey!\n' from ('127.0.0.1', 39598)
>>> qwerqwer
User input: qwerqwer
>>>
#-----------------------------------------------------------------------
# Get a character from the keyboard. If Block is True wait for input,
# else return any available character or throw an exception if none is
# available. Ctrl+C isn't handled and continues to generate the usual
# SIGINT signal, but special keys like the arrows return the expected
# escape sequences.
#
# This requires:
#
# import sys, select
#
# This was tested using python 2.7 on Mac OS X. It will work on any
# Linux system, but will likely fail on Windows due to select/stdin
# limitations.
#-----------------------------------------------------------------------
def get_char(block = True):
if block or select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], []):
return sys.stdin.read(1)
raise error('NoChar')
This is a posix solution, similar to the answer by swdev.
As they stated, you have to play with termios.VMIN and termios.VTIME to catch more than one char without requiring user to press Enter. Trying to only use raw mode will be a problem as special keys like arrows can mess next keypress.
Here we use tty.setcbreak() or tty.setraw() as a shortcut, but they have short internals.
import termios
import tty
import sys
import select
def get_enter_key():
fd = sys.stdin.fileno()
orig_fl = termios.tcgetattr(fd)
try:
tty.setcbreak(fd) # use tty.setraw() instead to catch ^C also
mode = termios.tcgetattr(fd)
CC = 6
mode[CC][termios.VMIN] = 0
mode[CC][termios.VTIME] = 0
termios.tcsetattr(fd, termios.TCSAFLUSH, mode)
keypress, _, _ = select.select([fd], [], [])
if keypress:
return sys.stdin.read(4095)
finally:
termios.tcsetattr(fd, termios.TCSANOW, orig_fl)
try:
while True:
print(get_enter_key())
except KeyboardInterrupt:
print('exiting')
sys.exit()
note that there are two potential timeouts you could add here:
one is adding last parameter to select.select()
another is playing with VMIN and VTIME
Might I suggest nobreak? If'n you are willing to use curses.
https://docs.python.org/3/library/curses.html#curses.window.nodelay
You should be able to get read of a stream with either
sys.stdin.read(1)
to read utf-8 decoded chars or:
sys.stdin.buffer.read(1)
to read raw chars.
I would do this if I wanted to get raw data from the stdin and do something with it in a timely manner, without reading a newline or filling up the internal buffer first. This is suitable for running programs remotely via ssh where tty is not available, see:
ssh me#host '/usr/bin/python -c "import sys; print(sys.stdin.isatty())"'
There are some other things to think about to make programs work as expected in this scenario. You need to flush the output when you're done to avoid buffering delays, and it could be easy to assume a program hasn't read the input, when you've simply not flushed the output.
stdout.write("my data")
stdout.flush()
But usually it's not the input reading that's the problem but that the terminal (or program) supplying the input stream is not handing it over when you expect, or perhaps it's not reading your output when you expect. If you have a tty to start with (see ssh check above) you can put it into raw mode with the tty module.
import sys
import termios
import tty
old = termios.tcgetattr(sys.stdin)
tty.setraw(sys.stdin)
c = None
try:
c = sys.stdin.read(1)[0]
finally:
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old)
print(c)
... if using Mac/Linux. If using Windows you could use msvcrt.getch().
Use a generator - thankfully sys.stdin is already a generator!
A generator enables you to work on an infinite stream. Always when you call it, it returns the next element. In order to build a generator you need the yield keyword.
for line in sys.stdin:
print line
if a_certain_situation_happens:
break
Do not forget to place a break statement into the loop if a certain, wished situation happens.
You can find more information about generators on:
http://www.dabeaz.com/generators/index.html
http://linuxgazette.net/100/pramode.html
Objective: A script that searches a directory for axograph files and performs as series of computations and returns either the values or saves it to CSV file.
Update: Data comes from Axographio raw files of intracellular recordings where each file has 4 waves of data. I seek to do the very basic ehpys calculations of properties:
input resistance,
half-width at half-maximum,
rheobase, SAG potential,
membrane time constant
afterhyperpolarisation.
I found python module "stimfit" which has useful libraries for this but it does not import properly. I´m currently troubleshooting this package.
This is my first script and first recordings, so if anyone has ideas, please share.
Problem 1. "f = axographio.read(filename)" only takes one argument with a string specific to "filename.axgd". I cant put this method alone since the resulting "file" is a list object and not the single instance of each filenames that I seek. I think that above function can work with "f=axographio.read(file)" if I can get that file object as a list or array of the filenames and extract them.
Problem 1 is now solved with f = axographio.read(os.path.join(root, file)) - it was unclear that os.path worked within the axographio module.
Problem 2. I can print all my files as list of strings but unable to extract and open each single file (even with file=open(blablabla.).
Update: I can either loop for each open file or save them to a list or array. I dont mind advice on best practice on this. axograph seems to work well with NumPy array but list is tempting since it has more flexibility. Any advice on this?
Note: its´s not necessary to make this complicated. The key point is to perform computations on multiple files at my choosing. Whether I save this into file or have more or less control is a matter of taste and time.
-*- coding: utf-8 -*-
"""
#author: Martenzi
"""
import os,sys
import numpy as np
import matplotlib.pylab as plt
import axographio
from scipy import stats
import re
import os
from collections import defaultdict
""" Search a directory for all Axograph files """
for root, dirs, files in os.walk("."):
for file in files:
if file.endswith(".axgd"):
f = axographio.read(os.path.join(root, file))
plt.show(file)
plt.plot(f.data[0],f.data[1])
plt.plot(f.data[0],f.data[2])
plt.plot(f.data[0],f.data[3])
plt.plot(f.data[0],f.data[4])
""" Below is various code snippets that I have tried out. They are not in specific order. I have tried things and stacked it as comments """
# if(line == 'foo'):
# line = next(irofile) #BEWARE, This could raise StopIteration!
# print line
# for i in file:
# cells = [];
# cells.append(file);
# for index, w in enumerate (loopme):
# cells = array ( [i] ,dtype=complex)
# print(file)
# for k in file:
# d = defaultdict(int)
# d[k].append()
# m=self.fileFormatRegex.match(file)
# self.processFile(root, open(os.path.join(r"/", root, file)), age, inString)
# infile = open(cells,"r")
# fullpath = os.path.walk(files)
# infile = open(file, "r")
# f = open(file ["r"][buffering])
# with open(infile, mode='r'):
## f = axographio.read(file)
# print(file)
# f = axographio.read(cell)
# with open(fullpath, 'r') as f:
# f = axographio.read(file)
# data = re.sub(r'(\s*function\s+.*\s*{\s*)',
# r'\1echo "The function starts here."',
# f.read())
# with open(fullpath, 'w') as f:
# f.write(data)
I'm trying to compile some Python 3.3 code using cx_freeze and, after compiling, the resulting test.exe file will create an indefinite number of instances of the program, causing my Windows 7 system to become unstable. It works just as intended when just running in Python, but once compiled it causes issues. Here are my imports in my main script:
import sys
from multiprocessing import Pool, Queue
from threading import Thread
from time import sleep, time
from inspect import getmembers
from PyQt5 import QtWidgets, QtCore, QtGui
from main_ui import Ui_MainWindow # Generated UI from pyuic, imports
# QtWidgets, QtCore, and QtGui
from devices import Device1, Device2 # Both are serial.Serial objects
The setup.py script:
import sys
from cx_Freeze import setup, Executable
product_name = 'Product Name'
path_platforms = ("C:\Python33\Lib\site-packages\PyQt5\plugins\platforms\qwindows.dll",
"platforms\qwindows.dll")
includes = ['PyQt5.QtWidgets', 'PyQt5.QtCore', 'PyQt5.QtGui']
include_files = [path_platforms]
excludes = ['_gtkagg', '_tkagg', 'bsddb', 'curses', 'email', 'pywin.debugger',
'pywin.debugger.dbgcon', 'pywin.dialogs', 'tcl',
'Tkconstants', 'Tkinter']
packages = ['os']
path = []
bdist_msi_options = {'add_to_path': False}
build_exe_options = {'includes': includes,
'include_files': include_files,
'excludes': excludes,
'packages': packages,
'path': path,
'silent': True}
base = None
if sys.platform == 'win32':
base = 'Win32GUI'
exe = Executable(script='main.pyw',
base=base,
targetName='test.exe')
setup(name=product_name,
version='1.0',
description='The Test Program',
executables=[exe],
options = {'bdist_msi': bdist_msi_options, 'build_exe': build_exe_options})
And when I run python setup.py build, the following error occurs:
Missing modules:
? System imported from serial.serialcli
? TERMIOS imported from serial.serialposix
? __main__ imported from bdb
? _gestalt imported from platform
? _posixsubprocess imported from subprocess
? clr imported from serial.serialcli
Despite these errors, it still generates a test.exe file. When I execute it, it generates a seemingly infinite number of windows and the only way to stop it is to hard reset the computer. Again, the main script works just fine running under Python, but fails once compiled. Any help would be greatly appreciated!
EDIT: As requested, here is my main script:
import sys
from multiprocessing import Pool, Queue, freeze_support
from threading import Thread
from time import sleep, time
from inspect import getmembers
from PyQt5 import QtWidgets, QtCore, QtGui
from main_ui import Ui_MainWindow # Generated by pyuic
import parts # Imports time.sleep, datetime.datetime, and threading.Thread
from devices import GwPowerSupply, DataQ # Imports time.sleep and serial.Serial
# GwPowerSupply is a serial.Serial object to handle communications with a GwInstek PSP-603
# DataQ is also a serial.Serial object to handle communications with a DataQ-155
def file_logger(message):
logging = True
if logging:
with open('log.txt', 'a') as f:
f.write('{}: {}\n'.format(time(), message))
def compute():
"""
A function, designed as an independent process, to gather data from the DataQ and Power Supply
input queues, convert to human values, and output as a single queue
"""
compute.running = True
compute.paused = False
# The initial dict to pass on to the queue
data_dict = {'upstream': 0, 'downstream': 0, 'high_flow': 0, 'low_flow': 0, 'voltage': 0, 'current': 0, 'offset': 0}
while compute.running:
if compute.paused or compute.input_queue.empty():
continue
# Get the raw voltage data and convert to pressure/flow
analog_input = compute.input_queue.get()
file_logger('Compute received {}'.format(analog_input))
if analog_input is None:
continue
# Four items comes from the DataQ for pressures and flow
if len(analog_input) == 4:
# Pressure Transducers are both 1-11V, 0-500 PSI
if isinstance(analog_input[0], (float, int)):
data_dict['upstream'] = (analog_input[0]-1) * 50
if isinstance(analog_input[1], (float, int)):
data_dict['downstream'] = (analog_input[1]-1) * 50
# High Flow is 0-5V, 0-1000 Liters/min
if isinstance(analog_input[2], (float, int)):
data_dict['high_flow'] = (analog_input[2]*200) * .035147 # Convert SLM to SCFM
# Low Flow is 0-5V, 0-5 Liters/min
if isinstance(analog_input[3], (float, int)):
data_dict['low_flow'] = analog_input[3] * 1000 # Convert SLM to SCCM
# Two items are from the power supply for voltage and current
elif len(analog_input) == 2:
if isinstance(analog_input[0], (float, int)):
data_dict['voltage'] = analog_input[0]
if isinstance(analog_input[1], (float, int)):
data_dict['current'] = analog_input[1]
# A single item is the offset from the Valve program
elif len(analog_input) == 1:
data_dict['offset'] = analog_input[0]
else:
return
compute.output_queue.put(data_dict)
file_logger('Compute put out {}'.format(data_dict))
def data_q_producer():
"""
A function, designed as an independent process, to gather data from the DataQ and feed it
to the computing process
"""
# Initialize COM port
data_q = DataQ('COM4')
data_q.start()
# Continuously gather data
while True:
if not data_q.paused and not data_q.stopped:
# Gather data and put to queue, either for response or normal
file_logger('Getting Data from DataQ')
if data_q.response:
data = data_q.get_response_data()
data_q_producer.response_queue.put(data)
else:
data = data_q.get_data()
data_q_producer.queue.put(data)
file_logger('Got {} from DataQ'.format(data))
# If a command is received, such as to energize a relay, handle
if not data_q_producer.output.empty():
output = data_q_producer.output.get()
file_logger('Sending {} to DataQ'.format(output))
# Strings are to stop, run response, etc.
if isinstance(output, str):
if output == 'stop':
data_q.set_output(0, 0, 0, 0)
data_q.stop()
data_q.close()
data_q_producer.queue.put([])
return
elif output == 'start resp':
data_q.response = True
data_q.pause()
data_q.start_resp()
data_q.start()
elif output == 'stop resp':
print('Stopping Response Test')
data_q.pause()
data_q.setup()
data_q.start()
data_q.response = False
# If a single integer is received, it is the new leakage offset.
elif isinstance(output, float):
data_q_producer.queue.put([output, ])
# A List is to set the digital outputs
elif isinstance(output, list):
data_q.set_output(output[0], output[1], output[2], output[3])
def pps_producer():
"""
A function, designed as an independent process, to gather data from the Power Supply and feed it
to the computing process
"""
# Initialize COM port
pps = GwPowerSupply('COM1')
pps.set_relay(True)
# Continuously gather voltage and current readings
while True:
file_logger('Getting Data from Power Supply')
voltage = pps.get_value('V')
current = pps.get_value('A')
file_logger('Got {}V, {}A from power supply'.format(voltage, current))
pps_producer.queue.put([voltage, current])
# If a command is received to change voltage, current, etc.; handle
if not pps_producer.output.empty():
output = pps_producer.output.get()
file_logger('Got {} for Power Supply'.format(output))
# Bool is to set the relay on or off
if isinstance(output, bool):
pps.set_relay(output)
# String is primarily to stop the power supply (set the relay to Off)
elif isinstance(output, str) and output == 'stop':
pps.set_relay(False)
pps.close()
pps_producer.queue.put([])
return
# A tuple is changing a power supply output setting
else:
pps.set_value(output[0], output[1])
def pool_init(input_queue, output_queue, data_q_out, pps_out, response_queue):
"""
Initializes the above functions with external queue variables.
see http://stackoverflow.com/a/3843313/852994 for more details
"""
compute.output_queue = output_queue
compute.input_queue = input_queue
data_q_producer.queue = input_queue
data_q_producer.output = data_q_out
data_q_producer.response_queue = response_queue
pps_producer.queue = input_queue
pps_producer.output = pps_out
class MainGui(QtWidgets.QMainWindow):
"""
The Main interface builder for the program
"""
def __init__(self):
# Initialize MainGui and create the window
super(MainGui, self).__init__()
self.ui = Ui_MainWindow()
self.ui.setupUi(self)
# The current valve part being tested
self.valve = None
# Disables the 'Energize' button when running ATP
self.auto_mode = False
# The current measured leakage offset based on the current run's test
self.measured_offset = 0
# The leakage offset table based on initial testing
# #TODO: retest offsets and go to 450 PSI
self.offset_table = ((-50, 30), (0, 31), (50, 44), (100, 37), (150, 41), (200, 44),
(250, 49), (300, 54), (350, 63), (400, 72), (450, 81))
# A table of calculated leakage offsets to give single-incremental points based on the
# above tested values
self.calculated_offsets = []
for i in range(len(self.offset_table)-1):
for x in range(self.offset_table[i][0], self.offset_table[i-1][0]):
x1 = self.offset_table[i][0]
x2 = self.offset_table[i+1][0]
y1 = self.offset_table[i][1]
y2 = self.offset_table[i+1][1]
y = ((x-x1) * (y2-y1)) / (x2-x1) + y1
self.calculated_offsets.append(y)
# Connect UI clicks and presses to commands
self.ui.btn_all.clicked.connect(lambda: self.select_all_tests(True))
self.ui.btn_none.clicked.connect(lambda: self.select_all_tests(False))
self.ui.comboBox.currentTextChanged.connect(self.select_part)
self.ui.btn_energize.clicked.connect(self.energize)
self.ui.btn_start.clicked.connect(self.start_tests)
self.ui.btn_skip.clicked.connect(self.skip_press)
# Select the initial part
self.select_part()
# Initialize queues
self.input_queue = Queue(10)
self.output_queue = Queue(10)
self.data_q_out = Queue(10)
self.pps_out = Queue(10)
self.response_queue = Queue(400)
self.test_queue = Queue(5)
self.log_queue = Queue(10)
# Initialize timer to update on-screen values
self.timer = QtCore.QTimer()
self.timer.timeout.connect(self.update_data)
self.timer.start(25)
# Initialize process pool
self.pool = Pool(processes=4, initializer=pool_init,
initargs=(self.input_queue, self.output_queue, self.data_q_out,
self.pps_out, self.response_queue))
# Place the data producing functions into the process pool
self.pool.apply_async(func=data_q_producer)
self.pool.apply_async(func=compute)
self.pool.apply_async(func=pps_producer)
def closeEvent(self, *args, **kwargs):
# Verify COM ports are closed properly before exiting
file_logger('Attempting Exit')
self.timer.stop()
self.test_queue.put('ESC')
self.data_q_out.put('stop')
self.pps_out.put('stop')
sleep(.5)
file_logger('Exited')
def keyPressEvent(self, event):
file_logger('Keypress Event: {}'.format(event.key()))
# Capture different key presses for different functions
if event.key() == QtCore.Qt.Key_Return:
self.test_queue.put(float(self.ui.lineEdit.text()))
elif event.key() == QtCore.Qt.Key_Backspace:
self.test_queue.put('ESC')
elif event.key() == QtCore.Qt.Key_S:
self.test_queue.put('SKIP')
def skip_press(self):
file_logger('Skip press Event')
self.test_queue.put('SKIP')
def print_to_log(self, text):
# Enter a line into the log with auto-scrolling
self.ui.log_output.append(text)
cursor = self.ui.log_output.textCursor()
QtGui.QTextCursor.movePosition(cursor, QtGui.QTextCursor.End)
self.ui.log_output.setTextCursor(cursor)
def update_data(self):
# Update status boxes
if not self.output_queue.empty():
file_logger('Update Interface Event')
data_dict = self.output_queue.get()
# Before calculating corrected leakage, get the offset
self.measured_offset = data_dict['offset']
# Modify low flow with offset
data_dict['low_flow'] -= self.measured_offset - self.calculated_offsets[int(data_dict['upstream'])]
# Update the status on the UI
self.ui.upstream_pressure.setText('{:.1f}'.format(data_dict['upstream']))
self.ui.downstream_pressure.setText('{:.1f}'.format(data_dict['downstream']))
self.ui.flow_sensor.setText('{:.2f}'.format(data_dict['high_flow']))
self.ui.leakage_sensor.setText('{:.0f}'.format(data_dict['low_flow']))
self.ui.voltage.setText('{:.2f}'.format(data_dict['voltage']))
self.ui.current.setText('{:.3f}'.format(data_dict['current']))
# Pass the values on to the test queue so the ATP process can use them
self.test_queue.put(data_dict)
if self.test_queue.full():
self.test_queue.get()
file_logger('Updated Interface')
# Update log
if not self.log_queue.empty():
text = self.log_queue.get()
file_logger('Printing to log: {}'.format(text))
# For the countdown timer, delete the previous line, but not the first count!
if isinstance(text, int) and text != 1:
cursor = self.ui.log_output.textCursor()
QtGui.QTextCursor.movePosition(cursor, QtGui.QTextCursor.End, QtGui.QTextCursor.MoveAnchor)
QtGui.QTextCursor.movePosition(cursor, QtGui.QTextCursor.StartOfLine, QtGui.QTextCursor.KeepAnchor)
QtGui.QTextCursor.removeSelectedText(cursor)
# Delete last newline character so the number doesn't print on the next line
QtGui.QTextCursor.deletePreviousChar(cursor)
self.print_to_log(str(text))
file_logger('Printed to log: {}'.format(text))
def select_all_tests(self, state=True):
# Select (or deselect if state is False) all tests
for i in range(len(self.ui.listWidget)):
self.ui.listWidget.item(i).setSelected(state)
def select_part(self):
# Update test list with a new part every time the combo box is changed
part_name = self.ui.comboBox.currentText()
for name, obj in getmembers(parts):
# Get the objects only labled as 'Part'
if 'Part' in name:
# Get the object with a part name that corresponds the the selected part
if part_name in obj().part_name:
self.valve = obj()
# Clear out the current contents of the test list
self.select_all_tests(False)
self.ui.listWidget.clear()
# Update test list with new tests
for test in self.valve.procedure:
self.ui.listWidget.addItem(test[0])
# Pre-select all tests
self.select_all_tests()
# Set Coils up properly; if there is only one coil in the unit, disable the second coil
self.ui.coil_1.setChecked(True)
if self.valve.coils < 2:
self.ui.coil_2.setChecked(False)
self.ui.coil_2.setEnabled(False)
else:
self.ui.coil_2.setEnabled(True)
self.ui.coil_2.setChecked(True)
return
def energize(self):
# Energize function for the energize button, but only if not running any test!
if self.auto_mode:
pass
else:
if self.ui.btn_energize.isChecked():
coil1 = int(self.ui.coil_1.checkState() / 2)
coil2 = int(self.ui.coil_2.checkState() / 2)
self.data_q_out.put([coil1, coil2, 2, 2])
else:
self.data_q_out.put([0, 0, 2, 2])
def start_tests(self):
file_logger('Starting Tests')
# Starts the testing thread
self.ui.log_output.setHtml('')
t = Thread(target=self.run_tests)
t.daemon = True
t.start()
def run_tests(self):
# Don't let the user try to start while running nor change the part number mid-test!
self.ui.btn_start.setEnabled(False)
self.ui.comboBox.setEnabled(False)
line = '-----------------------------------------------'
for test in self.valve.procedure:
# Verify the test is selected to run by iterating through all the test items in
# the test list and, if matching the current test name, verify the checked state
for i in range(len(self.ui.listWidget)):
if test[0] == self.ui.listWidget.item(i).text() and self.ui.listWidget.item(i).isSelected():
file_logger('Testing {}'.format(test[0]))
self.log_queue.put('<b>{1}\r\nRunning {0}\r\n{1}</b> '.format(test[0], line))
test[1](self.log_queue, self.test_queue, self.pps_out, self.data_q_out, self.response_queue)
# Tell the user of an escape or a skip
if self.valve.escape:
file_logger('Escaped'.format(test[0]))
self.log_queue.put('<b><font color="blue">Escaped</b></font> ')
self.ui.btn_start.setEnabled(True)
self.ui.comboBox.setEnabled(True)
self.valve.escape = False
# If escaping, break out of all loops
return
elif self.valve.skip:
file_logger('Skipped'.format(test[0]))
self.log_queue.put('<b><font color="orange">Skipped</b></font> ')
self.valve.skip = False
else:
file_logger('Test Successful')
# Once the test is found, break out of the test name matching loop
break
# If the test is not selected, notify user by displaying 'Skipping'
elif test[0] == self.ui.listWidget.item(i).text():
self.log_queue.put('<b>{1}</b>\r\nSkipping {0}'.format(test[0], line))
break
# Re-enable starting tests and selecting part numbers
self.ui.btn_start.setEnabled(True)
self.ui.comboBox.setEnabled(True)
if __name__ == '__main__':
freeze_support()
#input_queue = Queue(10)
#output_queue = Queue(10)
#data_q_out = Queue(10)
#pps_out = Queue(10)
#response_queue = Queue(400)
## Initialize process pool
#pool = Pool(processes=4, initializer=pool_init,
# initargs=(input_queue, output_queue, data_q_out, pps_out, response_queue))
#
## Place the data producing functions into the process pool
#pool.apply_async(func=data_q_producer)
#pool.apply_async(func=compute)
#pool.apply_async(func=pps_producer)
file_logger('####### NEW RUN #######\n')
app = QtWidgets.QApplication(sys.argv)
window = MainGui()
window.show()
file_logger('####### END RUN #######\n')
sys.exit(app.exec_())
You need to add the following code to your main, before anything else:
from multiprocessing import freeze_support
freeze_support()
See this stackoverflow post
I am trying to convert an mbox to a JSON structure suitable for import into MongoDB i.e.
I am using mining social web second edition mailbox chapter but its not working properly.
I am trying to convert an mbox to a JSON structure suitable for import into MongoDB i.e.
I am using mining social web second edition mailbox chapter but its not working properly.
import sys
import mailbox
import email
import quopri
import json
import time
from BeautifulSoup import BeautifulSoup
from dateutil.parser import parse
MBOX = 'resources/ch06-mailboxes/data/enron.mbox'
OUT_FILE = MBOX + '.json'
def cleanContent(msg):
# Decode message from "quoted printable" format, but first
# re-encode, since decodestring will try to do a decode of its own
msg = quopri.decodestring(msg.encode('utf-8'))
# Strip out HTML tags, if any are present.
# Bail on unknown encodings if errors happen in BeautifulSoup.
try:
soup = BeautifulSoup(msg)
except:
return ''
return ''.join(soup.findAll(text=True))
# There's a lot of data to process, and the Pythonic way to do it is with a
# generator. See http://wiki.python.org/moin/Generators.
# Using a generator requires a trivial encoder to be passed to json for object
# serialization.
class Encoder(json.JSONEncoder):
def default(self, o): return list(o)
# The generator itself...
def gen_json_msgs(mb):
while 1:
msg = mb.next()
if msg is None:
break
yield jsonifyMessage(msg)
def jsonifyMessage(msg):
json_msg = {'parts': []}
for (k, v) in msg.items():
json_msg[k] = v.decode('utf-8', 'ignore')
# The To, Cc, and Bcc fields, if present, could have multiple items.
# Note that not all of these fields are necessarily defined.
for k in ['To', 'Cc', 'Bcc']:
if not json_msg.get(k):
continue
json_msg[k] = json_msg[k].replace('\n', '').replace('\t', '').replace('\r', '')\
.replace(' ', '').decode('utf-8', 'ignore').split(',')
for part in msg.walk():
json_part = {}
if part.get_content_maintype() != 'text':
print >> sys.stderr, "Skipping MIME content in JSONification
({0})".format(part.get_content_maintype())
continue
json_part['contentType'] = part.get_content_type()
content = part.get_payload(decode=False).decode('utf-8', 'ignore')
json_part['content'] = cleanContent(content)
json_msg['parts'].append(json_part)
# Finally, convert date from asctime to milliseconds since epoch using the
# $date descriptor so it imports "natively" as an ISODate object in MongoDB
then = parse(json_msg['Date'])
millis = int(time.mktime(then.timetuple())*1000 + then.microsecond/1000)
json_msg['Date'] = {'$date' : millis}
return json_msg
mbox = mailbox.UnixMailbox(open(MBOX, 'rb'), email.message_from_file)
# Write each message out as a JSON object on a separate line
# for easy import into MongoDB via mongoimport
f = open(OUT_FILE, 'w')
for msg in gen_json_msgs(mbox):
if msg != None:
f.write(json.dumps(msg, cls=Encoder) + '\n')
f.close()
print "All done"
getting error:
80 # for easy import into MongoDB via mongoimport
81
---> 82 f = open(OUT_FILE, 'w')
83 for msg in gen_json_msgs(mbox):
84 if msg != None:
IOError: [Errno 13] Permission denied: 'resources/ch06-mailboxes/data/enron.mbox.json'
The code you mentioned became obsolete in Third Edition of Mining Social Web
I tried making a workable script that not just converts MBOX to JSON, but even extracts the Attachments to usable formats.
Link to the repo -
https://github.com/PS1607/mbox-to-json
Read the README file for usage instructions.
It seems that your problem is related to user permissions instead of Python. Line 82 tries to open a file in the "data" folder, but permission was denied. You should try executing your script using the sudo command from a terminal:
sudo python3 <your script name>
This should take care of the error you pointed out.
PS: Python 3 uses print as a function; line 88 should read
print('All done')