NiFi ExecuteGroovyScript unable to resolve class JSch - groovy

The groovyscript below is used to get file from remote machine using sftp. I need to get the file using sftp
import groovy.json.JsonSlurper
import com.jcraft.jsch.*
java.util.Properties config = new java.util.Properties()
config.put "StrictHostKeyChecking", "no"
JSch ssh = new JSch()
def rfile = "/path/to/remote/file/on/remote/host";
Session sess = ssh.getSession 'user','host', 22
sess.with {
setConfig config
setPassword password
connect()
Channel chan = openChannel "sftp"
chan.connect()
ChannelSftp sftp = (ChannelSftp) chan;
def flowFile = session.get()
if(!flowFile) return
flowFile.write{rawIn, rawOut->
def keyValueList = rawIn.withReader("UTF-8"){ new JsonSlurper().parse(it) }
sftp.get("rfile/abc.txt").withReader("UTF-8"){reader->
rawOut.withWriter("UTF-8"){writer->
reader.eachLine{line->
keyValueList.each{ if(it.Key) line = line.replaceAll(it.Key, it.Value) }
writer << line << '\n'
}
}
}
}
chan.disconnect()
disconnect()
REL_SUCCESS << flowFile
}
Getting error:
unable to resolve class JSch # line 7, column 6. JSch ssh = new JSch() org.codehaus.groovy.syntax.SyntaxException
I am using import import com.jcraft.jsch.* but it seems JSch class is not available or import is not proper.

you have to download jsch libraries from http://www.jcraft.com/jsch/ and put jars into nifi/lib directory
or if your nifi server has access to internet you could use this script annotation to download library from public repository
#Grab(group='com.jcraft', module='jsch', version='0.1.55')
import com.jcraft.jsch.*
...

Related

Is there a way to use Spark to load a file in FTP using TLS

I am in the process of moving a python process to Spark. In python we are using ftplib to connect and download a file to a EC2 instance. Once file is downloaded, we are uploading to S3. We are transitioning to severless infrastructure and would like to load file in spark via AWS Glue and then use mulit-part upload to move it to S3. I have tried to just run the current code in a in a larger glue instance type but the machine still runs out of memory (20gb file).
old python code
"""
This script will get the backup file
"""
import sys
from datetime import datetime
import re
import ftplib
from retry import retry
import shutil
from tools.python.s3_functions import s3_upload
from python_scripts.get import *
def get_ftp_connector(path, user, password):
ftp = ftplib.FTP_TLS(path)
ftp.login(user, password)
ftp.prot_p()
return ftp
def get_ftp_files_list(ftp, dir):
ftp.cwd(dir)
files = ftp.nlst()
print(str("-".join(files)))
if "filecompleted.txt" not in files:
print("Failed to find filescompleted.txt file in ftp server.")
raise Exception("Failed to find filescompleted.txt file in ftp server.")
regex_str = 'Backup_File_Mask_Goes_here([\d]{8}).bak'
find_date_regex = re.compile(regex_str)
searched = [(f, find_date_regex.match(f)) for f in files if find_date_regex.match(f)]
searched = \
[(file_name, datetime.strptime(regex_result.groups()[0], '%Y%m%d')) for file_name, regex_result in searched]
searched = sorted(searched, key=lambda elem: elem[1], reverse=True)
if not searched:
print("Failed to find appropriate file in ftp server.")
raise Exception("Failed to find appropriate file in ftp server.")
return searched[0]
class FtpUploadTracker:
size_written = 0
total_size = 0
last_shown_percent = "X"
def __init__(self, total_size, bk_file):
self.total_size = total_size
self.bk_file = bk_file
self.output_file = open(self.bk_file, 'wb')
self.start_time = datetime.now()
def handle(self, block):
self.size_written += len(block)
percent_complete = str(round((self.size_written / self.total_size) * 100, 1))
self.output_file.write(block)
time_elapsed = (datetime.now() - self.start_time).total_seconds()
speed = round(self.size_written / (1000 * 1000 * time_elapsed), 2)
msg = "{percent}% complete # average speed of {speed}MB/s : total run time {minutes}m".\
format(percent=percent_complete, speed=speed, minutes=round(time_elapsed/60))
if time_elapsed > 600 and speed < 1:
print("Zombie connection, failing dl.")
raise Exception("Zombie connection, failing dl.")
if self.last_shown_percent != percent_complete:
self.last_shown_percent = percent_complete
print(msg)
def close(self):
self.output_file.close()
#retry(tries=4, delay=300)
def retrieve_db():
"""
This function will retrieve via FTP the backup
:return: None
"""
ftp = get_ftp_connector(FTP_PATH, FTP_USER, FTP_PASSWORD)
# return back the most recent entry
file_name, file_date = get_ftp_files_list(ftp, 'database')
file_epoch = (file_date - datetime(1970, 1, 1)).total_seconds()
new_file_name = "backup_{epoch}.bak".format(epoch=str(int(file_epoch)))
if os.path.exists(DATAFILEPATH):
shutil.rmtree(DATAFILEPATH)
if not os.path.exists(DATAFILEPATH):
os.makedirs(DATAFILEPATH)
temp_backup_file_location = os.path.join(DATAFILEPATH + new_file_name)
print("Found file {file_name}, and downloading it to {loc}".
format(file_name=file_name, loc=temp_backup_file_location))
ftp_handler = FtpUploadTracker(ftp.size(file_name), temp_backup_file_location)
ftp.retrbinary("RETR " + file_name, ftp_handler.handle)
ftp.quit()
ftp_handler.close()
print("Finished download. Uploading to S3.")
s3_upload(DATAFILEPATH, new_file_name, bucket, "db_backup")
os.remove(temp_backup_file_location)
def main():
try:
retrieve_db()
except Exception as e:
print("Failed to download backup after 4 tries with error {e}.".format(e=e))
return 1
return 0
if __name__ == "__main__":
rtn = main()
sys.exit(rtn)
New Spark Code (in progress): The username has a | character that made me encode the uri. When I run the code, I get a connection refused. I am able to use same connection info for python.
from pyspark import SparkContext
from pyspark import SparkFiles
import urllib
sc = SparkContext()
ftp_path = "ftp://Username:password#ftplocation.com/path_to_file"
file_path_clean = urllib.parse.urlencode(ftp_path, safe='|')
print(f"file_path_clean: {file_path_clean}")
sc.addFile(ftp_path)
filename = SparkFiles.get(file_path.split('/')[-1])
print(f"filename: {filename}")
rdd = sc.textFile("file://" + filename)
print("We got past rdd = sc.textFile(file:// + filename)")
rdd.take(10)
rdd.collect()
print(rdd)
There are three ways to approach the problem:
Use a mounted file system backed by FTP and write to it from Spark.
Use a Spark to SFTP connector such as spark-sftp.
Write the files with Spark somewhere else and copy to SFTP as a separate step. Due to the various reliability issues with SFTP and the fact that Spark leaves partial output during failed write operations, which is the path that we've taken. We write terabytes to SFTP endpoints using code that looks like the following in Scala. I hope it can be helpful for you Python work.
/** Defines some high-level operations for interacting with remote file protocols like FTP, SFTP, etc.
*/
trait RemoteFileOperations extends Closeable {
var backoff: BlockingRetry.Backoff = Backoff.linear(3000)
var retry: BlockingRetry.Retry = Retry.maxRetries(3)
var recover: Recovery = recoverable(this)
var ignore: Ignored = nonRecoverable
def listFiles(path: String = ""): Seq[FInfo]
def uploadFile(localPath: String, remoteDirectory: String): Unit
def downloadFile(localPath: String, remotePath: String): Unit
def deleteAll(path: String): Unit
def connect(): Unit = {}
def disconnect(): Unit = {}
def reconnect(): Unit = {
disconnect()
connect()
}
override def close(): Unit = disconnect()
/** Wraps a block of code and allows it to be retried when [[recoverable()]] conditions
* are met. [[BlockingRetry.retry()]] is called with the var fields
* [[backoff]], [[retry]], [[recover]], and [[ignore]], which can all be reconfigured.
*/
def retryable[A](f: => A): A = {
BlockingRetry.retry(retry, backoff, recover, ignore) {
f
}
}
def recoverable(fileOp: RemoteFileOperations): Recovery = {
case (_: SocketTimeoutException, _: Int) =>
fileOp.reconnect()
None
}
def nonRecoverable: Ignored = {
case _: UnknownHostException |
_: SSLException |
_: SocketException |
_: IllegalStateException =>
}
}
class SSHJClient(host: String, username: String, password: String) extends RemoteFileOperations {
import net.schmizz.keepalive.KeepAliveProvider
import net.schmizz.sshj.connection.ConnectionException
import net.schmizz.sshj.sftp.SFTPClient
import net.schmizz.sshj.transport.verification.PromiscuousVerifier
import net.schmizz.sshj.xfer.FileSystemFile
import net.schmizz.sshj.{DefaultConfig, SSHClient}
override def listFiles(path: String): Seq[FInfo] = {
import collection.JavaConverters._
retryable {
sftpSession(sftp => {
sftp.ls(path).asScala
.filter(f => f.getName != "." && f.getName != "..")
.map(f => FInfo(f.getPath, f.getParent, f.isDirectory, f.getAttributes.getSize, f.getAttributes.getMtime))
})
}
}
override def uploadFile(localPath: String, remoteDirectory: String): Unit = {
retryable {
sftpSession(sftp => {
sftp.getFileTransfer.setPreserveAttributes(false)
sftp.put(new FileSystemFile(localPath), remoteDirectory)
})
}
}
override def downloadFile(localPath: String, remotePath: String): Unit = {
retryable {
sftpSession(sftp => {
sftp.getFileTransfer.setPreserveAttributes(false)
sftp.get(remotePath, new FileSystemFile(localPath))
})
}
}
override def deleteAll(path: String): Unit =
throw new UnsupportedOperationException("#deleteAll is unsupported for SSHJClient")
private def sftpSession[A](f: SFTPClient => A): A = {
val defaultConfig = new DefaultConfig()
defaultConfig.setKeepAliveProvider(KeepAliveProvider.KEEP_ALIVE)
val ssh = new SSHClient(defaultConfig)
try {
// This is equivalent to StrictHostKeyChecking=no which is disabled since we don't usually know
// the SSH remote host key ahead of time.
ssh.addHostKeyVerifier(new PromiscuousVerifier())
ssh.connect(host)
ssh.authPassword(username, password)
val sftp = ssh.newSFTPClient()
try {
f(sftp)
} finally {
sftp.close()
}
} finally {
ssh.disconnect()
}
}
override def recoverable(fileOp: RemoteFileOperations): Recovery = {
super.recoverable(fileOp).orElse {
case (e: ConnectionException, _: Int) =>
println(s"Recovering session from exception: $e")
None
}
}
}

Compile Groovy Script from command prompt which contains soapui libraries

I am trying to create a script library and compile a groovy class as described in https://stackoverflow.com/a/35498212/9997207
Groovy Script
import groovy.json.*
import com.eviware.soapui.*
class ScriptLibrary {
def context
def testRunner
def log
def boolean setHeaderValues(userId, password){
try{
loginTestStep = testRunner.testCase.testSuite.testCases["SetHeaderParameters"].testSteps["SetHeaderParametersJsonRequest"]
def request = loginTestStep.getPropertyValue("Request").toString()
def jsonReq = new JsonSlurper().parseText(request);
def builder = new JsonBuilder(jsonReq)
builder.content.userId=userId
builder.content.password=password
def jsonReqAsString = JsonOutput.toJson(jsonReq)
loginTestStep.setPropertyValue("Request",jsonReqAsString)
def contextJsonRequest = new WsdlTestRunContext(loginTestStep);
loginTestStep.run(testRunner,contextJsonRequest)
def response = loginTestStep.getPropertyValue("Response").toString()
def jsonResponse = new JsonSlurper().parseText(response);
def accessTokenFromResponse = jsonResponse.accessToken.toString()
def userPermissionFromResponse = jsonResponse.userPermissionIds.toString()
def userIdFromResponse = jsonResponse.userId.toString()
testRunner.testCase.testSuite.project.setPropertyValue("HEADER_USER_ID", userIdFromResponse)
testRunner.testCase.testSuite.project.setPropertyValue("HEADER_USER_PERMISSION", userPermissionFromResponse)
testRunner.testCase.testSuite.project.setPropertyValue("HEADER_ACCESS_TOKEN", accessTokenFromResponse)
log.info "Header set with values "+userIdFromResponse+":::"+userPermissionFromResponse+":::"+accessTokenFromResponse
setHeader = true
}
return setHeader
}
catch (Exception ex) {
log.info "Header Not Set " +ex
setHeader = false
testRunner.testCase.testSuite.project.setPropertyValue("HEADER_USER_ID", "")
testRunner.testCase.testSuite.project.setPropertyValue("HEADER_USER_PERMISSION", "")
testRunner.testCase.testSuite.project.setPropertyValue("HEADER_ACCESS_TOKEN", "")
return setHeader
}
}
}
Getting following compilation error while trying to compile the groovy script from the command prompt
C:\Path\apache-groovy-binary-2.5.1\groovy-2.5.1\bin\GroovyScripts>groovy ScriptLibrary.groovy
org.codehaus.groovy.control.MultipleCompilationErrorsException: startup failed:
C:\Path\apache-groovy-binary-2.5.1\groovy-2.5.1\bin\GroovyScripts\ScriptLibrary.groovy: 20: unable to resolve class WsdlTestRunContext
# line 20, column 26.
def contextJsonRequest = new WsdlTestRunContext(loginTestStep);
^
1 error
You have to include soupui-{version}.jar file in the classpath to resolve this dependency issue. You can find it in SoapUI-{version}/bin folder. Let's say that SoapUI 5.4.0 is located in /tmp/SoapUI-5.4.0/. In this case I can compile a script with following command:
groovyc -d classes -cp ".:/tmp/SoapUI-5.4.0/bin/soapui-5.4.0.jar" script.groovy
Keep in mind that this command is run from the folder where script.groovy is located and compiled class can be found in ./classes/script.class

groovy executing shell commands on remote server

I have an issue about executing shell commands on a remote server.
I'm trying various solutions and I have one working but it is not optimized in terms of maintenance : I use a batch file that launches putty which connects to the remote server ans sends the command.
ie. in groovy :
def batchFile = "C:\\Validation\\Tests_Auto\\Scripts\\remote_process\\setOldDate.bat"
Runtime.runtime.exec(batchFile)
and in my batch file :
c:
cd C:\Validation\Tests_Auto\Scripts\remote_process\
putty.exe -ssh root#xx.xx.xx.xx -pw **** -m "C:\Validation\Tests_Auto\Scripts\remote_process\setOldDate.txt"
setOldDate.txt contains the command date -s #1522018800
This works. However I'd like to launch it in a cleaner way, either avoiding the use of text file for the command or, better, avoiding using putty.
I tried several another way to do the same thing but it doesn't work. I think I'm not too far but I need a little help.
I tried to launch a direct command via ssh:
Runtime.getRuntime().exec('"c:\\Program Files\\OpenSSH\\bin\\ssh.exe" root:****#xx.xx.xx.xx date -s #1522018800')
I'd be grateful if anyone could help
thanks
#Grab(group='com.jcraft', module='jsch', version='0.1.54')
def ant = new AntBuilder()
ant.sshexec( host:"somehost", username:"dude", password:"yo", command:"touch somefile" )
for other sshexec and scp tasks parameters see doc:
https://ant.apache.org/manual/Tasks/sshexec.html
https://ant.apache.org/manual/Tasks/scp.html
for soapui
this method using apache ant + jsch-0.1.54.jar
the only way i know for soapui:
download the following libraries and put them into soapui\bin\endorsed directory (create the endorsed directory)
https://central.maven.org/maven2/org/apache/ant/ant/1.9.11/ant-1.9.11.jar
https://central.maven.org/maven2/org/apache/ant/ant-launcher/1.9.11/ant-launcher-1.9.11.jar
https://central.maven.org/maven2/com/jcraft/jsch/0.1.54/jsch-0.1.54.jar
edit the soapui\bin\soapui.bat and add the following line where other JAVA_OPTS are defined:
set JAVA_OPTS=%JAVA_OPTS% -Djava.endorsed.dirs="%SOAPUI_HOME%endorsed"
that's because ant libs must be loaded before groovy.
then the code above should work in soapui (except #Grab)
Alternatively you can download only jsch-XXX.jar into existing soapui\bin\ext directory and use jsch library directly from groovy
see examples: http://www.jcraft.com/jsch/examples/
or search for groovy jsch examples
Finally, compiling my various research and struggling to fit my environment constraints (groovy in soapui), I ended up with the following solution that works for me :
download jsch-0.1.54.jar and set it in C:\Program Files\SmartBear\ReadyAPI-2.3.0\bin\ext
use the following groovy script :
import java.util.Properties
import com.jcraft.jsch.ChannelExec
import com.jcraft.jsch.JSch
import com.jcraft.jsch.Session
def ip = context.expand( '${#Project#projectEndpoint}' )
try
{
JSch jsch = new JSch();
Session session = jsch.getSession("root","$ip", 22);
session.setPassword("****");
// Avoid asking for key confirmation
Properties prop = new Properties();
prop.put("StrictHostKeyChecking", "no");
session.setConfig(prop);
session.connect();
// SSH Channel
ChannelExec channelssh = (ChannelExec)session.openChannel("exec");
// Execute command
//channelssh.setCommand("date -s #1520018000"); // change date
channelssh.setCommand("ntpdate -u pool.ntp.org"); // restore date
channelssh.connect();
channelssh.disconnect();
}
catch (Exception e)
{
log.info "exception : " + e
System.out.println(e.getMessage());
}
finally
{
session.disconnect();
}
UPGRADE
Here is a generalization I've made as my needs evolved. The following script, still using jsch allows to send any command.
This deals with host checking and eliminates hazards due to no host checking.
User and password are passed as parameters
import java.util.Properties
import com.jcraft.jsch.ChannelExec
import com.jcraft.jsch.JSch
import com.jcraft.jsch.Session
import java.util.regex.Pattern
def ip = context.expand( '${get endpoint#endpoint}' )
ip = ip.replaceFirst("http[s]?://","")
def user = context.expand( '${#Project#ssh_user}' )
def password = context.expand( '${#Project#ssh_password}' )
def command = context.expand( '${#TestCase#command}' )
def timeout = context.expand( '${#TestCase#timeout_ms}' )
if (timeout == "")
timeout = 1000 // default timeout 1s
else
timeout = timeout.toInteger()
log.info "command = " + command
Session session
try
{
JSch jsch = new JSch();
session = jsch.getSession(user,ip, 22);
session.setPassword(password);
//log.info "user : $user"
//log.info "set password : $password"
//log.info System.getProperty("user.home")+"/.ssh/known_hosts"
jsch.setKnownHosts(System.getProperty("user.home")+"/.ssh/known_hosts");
session.connect();
//log.info "session connect"
// SSH Channel
ChannelExec channelssh = (ChannelExec)session.openChannel("exec");
// Execute command
channelssh.setCommand(command);
InputStream commandOutput = channelssh.getInputStream();
channelssh.connect();
int readByte = commandOutput.read();
outputBuffer = [];
// timeout to avoid infinite loop
while((readByte != -1) && (timeout > 0))
{
outputBuffer.add(readByte)
readByte = commandOutput.read();
timeout = timeout -1
}
// process output
outputBuffer = outputBuffer as byte[]
// convert byte array into string
output = new String(outputBuffer, "UTF-8")
sleep(3000)
//log.info "disconnect"
channelssh.disconnect();
testRunner.testCase.setPropertyValue("cmd_output", output)
}
catch (Exception e)
{
msg = "exception : " + e
log.error msg
testRunner.fail(msg)
}
finally
{
session.disconnect();
}

storing the value of linux command to a variable in groovy script (soapui)

I am executing following groovy script in my soapui assertion where I am trying to store the value of the output of linux command into a variable so that I cam compare the output of linux command with expected value.My groovy is as below
import groovy.sql.Sql
import java.sql.DriverManager
import java.sql.Connection
import javax.sql.DataSource
import java.sql.Driver;
import java.util.*;
import java.text.*;
// Executing Script to capture logs
log.info "Executing Script to capture logs"
import java.lang.Object
import com.jcraft.jsch.JSch
import com.jcraft.jsch.Session
import com.jcraft.jsch.UserInfo
import com.jcraft.jsch.Channel
import com.jcraft.jsch.ChannelExec
import java.util.Properties
def sshHost = context.expand('${#Project#Kcom}');
def sshUser = 'sshuser'
def sshPass = 'sshpass'
def sshPort = port value
/////////////////////////////
log.info "Opening connection to ${sshUser}#${sshHost}:${sshPort}"
Properties config = new Properties()
config.put("StrictHostKeyChecking", "no")
JSch jsch = new JSch()
///////////////////////////
Session s = jsch.getSession(sshUser, sshHost, sshPort)
s.setPassword(sshPass)
s.setConfig(config)
s.connect()
log.info "Connected"
Channel c = s.openChannel("exec");
ChannelExec ce = (ChannelExec) c;
def logerror = ce.setCommand("tail -50 <log file> | grep -i 'some string'");
log.info logerror
ce.setErrStream(System.err);
ce.connect();
BufferedReader reader = new BufferedReader(new InputStreamReader(ce.getInputStream()));
String line;
while ((line = reader.readLine()) != null) {
log.info(line);
}
here logerror always returns null .
how can i do this?
i don't know what's wrong with your code, i just know that ant command sshexec do what you need.
so, you can see how to do it correctly by sources
ps: i know that soapui includes limited groovy-all.jar but if you able to add full groovy library list into soapui classpath it'll possible to use groovy.lang.AntBuilder():
def ant = new AntBuilder()
ant.sshexec(
host:"somehost",
username:"dude",
password:"yo",
command:"tail -50 <log file> | grep -i 'some string'",
output: "path/to/local/output/file"
)

groovy script classpath

I'm writing a script in Groovy and I would like someone to be able to execute it simply by running ./myscript.groovy. However, this script requires a 3rd party library (MySQL JDBC), and I don't know of any way to provide this to the script other than via a -classpath or -cp argument, e.g.
`./monitor-vouchers.groovy -cp /path/to/mysql-lib.jar`
For reasons I won't go into here, it's not actually possible to provide the JAR location to the script using the -classpath/-cp argument. Is there some way that I can load the JAR from within the script itself? I tried using #Grab
import groovy.sql.Sql
#Grab(group='mysql', module='mysql-connector-java', version='5.1.19')
def getConnection() {
def dbUrl = 'jdbc:mysql://database1.c5vveqm7rqgx.eu-west-1.rds.amazonaws.com:3306/vouchers_prod'
def dbUser = 'pucaroot'
def dbPassword = 'password'
def driverClass = "com.mysql.jdbc.Driver"
return Sql.newInstance(dbUrl, dbUser, dbPassword, driverClass)
}
getConnection().class
But this causes the following error:
Caught: java.sql.SQLException: No suitable driver
java.sql.SQLException: No suitable driver
at monitor-vouchers.getConnection(monitor-vouchers.groovy:13)
at monitor-vouchers.run(monitor-vouchers.groovy:17)
Is there a way I can execute this script using just ./monitor-vouchers.groovy
You should be able to do:
import groovy.sql.Sql
#GrabConfig(systemClassLoader=true)
#Grab('mysql:mysql-connector-java:5.1.19')
def getConnection() {
def dbUrl = 'jdbc:mysql://database1.c5vveqm7rqgx.eu-west-1.rds.amazonaws.com:3306/vouchers_prod'
def dbUser = 'pucaroot'
def dbPassword = 'bigsecret'
def driverClass = "com.mysql.jdbc.Driver"
return Sql.newInstance(dbUrl, dbUser, dbPassword, driverClass)
}
getConnection().class
Two more options:
Put the jar in ${user.home}/.groovy/lib
If the jar is in a known location, use this code to load it into the current class loader:
this.class.classLoader.rootLoader.addURL( new URL() )

Resources