Download a zip file using Groovy - groovy

I need to download a zip file from a url using groovy.
Test url: https://gist.github.com/daicham/5ac8461b8b49385244aa0977638c3420/archive/17a929502e6dda24d0ecfd5bb816c78a2bd5a088.zip
What I've done so far:
def static downloadArtifacts(url,filename) {
new URL(url).openConnection().with { conn ->
conn.setRequestProperty("PRIVATE-TOKEN", "xxxx")
url = conn.getHeaderField( "Location" )
if( !url ) {
new File((String)filename ).withOutputStream { out ->
conn.inputStream.with { inp ->
out << inp
inp.close()
}
}
}
}
}
But while opening the downloaded zip file I get an error "An error occurred while loading the archive".
Any help is appreciated.

URL url2download = new URL(url)
File file = new File(filename)
file.bytes = url2download.bytes

You can do it with HttpBuilder-NG:
// https://http-builder-ng.github.io/http-builder-ng/
#Grab('io.github.http-builder-ng:http-builder-ng-core:1.0.3')
import groovyx.net.http.HttpBuilder
import groovyx.net.http.optional.Download
def target = 'https://gist.github.com/daicham/5ac8461b8b49385244aa0977638c3420/archive/17a929502e6dda24d0ecfd5bb816c78a2bd5a088.zip'
File file = HttpBuilder.configure {
request.uri = target
}.get {
Download.toFile(delegate, new File('a.zip'))
}

You can do it:
import java.util.zip.ZipEntry
import java.util.zip.ZipOutputStream
class SampleZipController {
def index() { }
def downloadSampleZip() {
response.setContentType('APPLICATION/OCTET-STREAM')
response.setHeader('Content-Disposition', 'Attachment;Filename="example.zip"')
ZipOutputStream zip = new ZipOutputStream(response.outputStream);
def file1Entry = new ZipEntry('first_file.txt');
zip.putNextEntry(file1Entry);
zip.write("This is the content of the first file".bytes);
def file2Entry = new ZipEntry('second_file.txt');
zip.putNextEntry(file2Entry);
zip.write("This is the content of the second file".bytes);
zip.close();
}
}

Related

How to move a component in Sonatype Nexus 3?

I'm trying to promote a component together with all it's assets from our staging repo to the released repo. So far I've managed to create the component so that it get's detected when trying to recreate it but it won't show up in searches or browsing. I'm pretty sure I need to emit an event for it to show up but even after rebuilding the indexes it won't show up so I must have missed something else as well.
import org.sonatype.nexus.repository.storage.Asset
import org.sonatype.nexus.repository.storage.Query
import org.sonatype.nexus.repository.storage.Component
import org.sonatype.nexus.repository.storage.StorageFacet
import groovy.json.JsonOutput
import groovy.json.JsonSlurper
def request = new JsonSlurper().parseText(args)
assert request.fromRepoName: 'fromRepo parameter is required'
assert request.toRepoName: 'toRepo parameter is required'
assert request.artifactId: 'artifactId parameter is required'
assert request.groupId: 'groupId parameter is required'
assert request.version: 'version parameter is required'
log.info("Moving ${request.groupId}:${request.artifactId}:${request.version} from ${request.fromRepoName} to ${request.toRepoName}")
def fromRepo = repository.repositoryManager.get(request.fromRepoName)
def toRepo = repository.repositoryManager.get(request.toRepoName)
StorageFacet fromStorageFacet = fromRepo.facet(StorageFacet)
StorageFacet toStorageFacet = toRepo.facet(StorageFacet)
def fromTx = fromStorageFacet.txSupplier().get()
def toTx = toStorageFacet.txSupplier().get()
try {
fromTx.begin()
log.info("Transaction started for repo: ${request.fromRepoName}")
Iterable<Component> components = fromTx.
findComponents((Query.builder().where('name == ').param(request.artifactId).and('group == ').param(request.groupId) & 'version == ').param(request.version).build(), [fromRepo])
log.info("Trying to get component.")
component = components.iterator().next()
try {
toTx.begin()
log.info("Transaction started for repo: ${request.toRepoName}")
if (toTx.componentExists(component.group(), component.name(), component.version(), toRepo)) {
log.info("Component already exists, deleting.")
Iterable<Component> componentsToDelete = toTx.
findComponents((Query.builder().where('name == ').param(request.artifactId).and('group == ').param(request.groupId) & 'version == ').param(request.version).build(), [toRepo])
log.info("Trying to get component.")
componentToDelete = componentsToDelete.iterator().next()
toTx.deleteComponent(componentToDelete)
}
log.info("Creating new component with name: ${component.name()}, group: ${component.group()}, version: ${component.version()} in repo ${request.toRepoName}")
newComponent = toTx.createComponent(toTx.findBucket(toRepo), toRepo.getFormat())
.name(component.name())
.group(component.group())
.version(component.version())
.attributes(component.attributes())
newComponent.setEntityMetadata(component.getEntityMetadata())
log.info("Component metadata: ${component.getEntityMetadata()}")
log.info("New component successfully created.")
fromTx.browseAssets(component).each {Asset asset ->
log.info("Attaching ${asset.name()} to new component.")
log.info("Asset metadata: ${asset}")
newAsset = toTx.createAsset(toTx.findBucket(toRepo), newComponent)
.name(asset.name())
.attributes(asset.attributes())
newAsset.setEntityMetadata(asset.getEntityMetadata())
log.info("Asset entityMetadata: ${asset.getEntityMetadata()}")
log.info("New asset metadata: ${newAsset}")
log.info("Trying to save asset.")
toTx.saveAsset(newAsset)
}
toTx.saveComponent(newComponent)
toTx.commit()
log.info("Transaction commited for repo: ${request.toRepoName}")
} finally {
toTx.close()
}
log.info("Deleting component with name: ${component.name()}, group: ${component.group()}, version: ${component.version()} in repo ${request.fromRepoName}")
fromTx.deleteComponent(component)
fromTx.commit()
log.info("Transaction commited for repo: ${request.fromRepoName}")
}
finally {
fromTx.close()
}
def result = JsonOutput.toJson([
status: "Success"
])
return result
For added context I'm trying to setup a CI/CD flow with Jenkins Pipelines & Maven based on this: https://www.cloudbees.com/blog/new-way-do-continuous-delivery-maven-and-jenkins-pipeline
Not sure if you solved this already but seems like I am trying to achieve a very similar solution.
I've found that if I add the assets into the destination repository using the MavenFacet or RawContentFacet (depending on the underlying repository format) and using the put method then the uploaded assets do show up in browse and search.
This is the code I have come up with:
import org.sonatype.nexus.repository.storage.StorageFacet
import org.sonatype.nexus.repository.transaction.TransactionalStoreBlob
import org.sonatype.nexus.repository.view.payloads.BlobPayload
import org.sonatype.nexus.repository.storage.Component
import org.sonatype.nexus.repository.storage.Query
import org.sonatype.nexus.repository.maven.MavenFacet
import org.sonatype.nexus.repository.maven.internal.Maven2Format
import org.sonatype.nexus.repository.raw.RawContentFacet
import org.sonatype.nexus.repository.raw.internal.RawFormat
import org.sonatype.nexus.repository.storage.Asset
import org.sonatype.nexus.blobstore.api.BlobStoreManager
import org.sonatype.nexus.repository.manager.RepositoryManager
RepositoryManager rManager = repository.repositoryManager
BlobStoreManager bManager = blobStore.blobStoreManager
def STAGING = 'staging'
def input = new groovy.json.JsonSlurper().parseText(args)
for (item in input.items) {
assert item.to: 'item.to parameter is required'
assert item.group: 'item.group parameter is required'
assert item.artifact: 'item.artifact parameter is required'
assert item.version: 'item.version parameter is required'
assert item.to != STAGING: 'item.to cannot be the staging area'
// Check the repository exists
def toRepo = repository.repositoryManager.get(item.to)
assert toRepo: 'item.to is not a valid repository name'
// Currently only support these payload types
assert [Maven2Format.NAME, RawFormat.NAME].contains(toRepo.getFormat().toString()): 'Unknown Format ' + toRepo.getFormat().toString()
}
def repo = rManager.get(STAGING)
def tx = repo.facet(StorageFacet).txSupplier().get()
def deNest = new ArrayList()
try
{
tx.begin()
for (item in input.items) {
Iterable<Component> components = tx.findComponents(
Query.builder().where('name == ').param(item.artifact).and('group == ').param(item.group)
.and('version == ').param(item.version).build(), [repo])
Component component = components.iterator().next();
tx.browseAssets(component).each { Asset asset ->
def br = asset.requireBlobRef()
deNest.add([
to: item.to,
store: br.getStore(),
blobId: br.getBlobId(),
asset: asset.name(),
contentType: asset.contentType()
])
}
}
tx.commit()
}
finally
{
tx.close()
}
for (dn in deNest) {
def toRepo = rManager.get(dn.to)
def payload = new BlobPayload(bManager.get(dn.store).get(dn.blobId), dn.contentType)
TransactionalStoreBlob.operation.withDb(toRepo.facet(StorageFacet).txSupplier()).call {
switch (toRepo.getFormat().toString()) {
case Maven2Format.NAME:
def toTx = toRepo.facet(MavenFacet)
toTx.put(toTx.mavenPathParser.parsePath(dn.asset), payload)
break
case RawFormat.NAME:
def toTx = toRepo.facet(RawContentFacet)
toTx.put(dn.asset, payload)
break
default:
throw new RuntimeException('Unknown Format ' + toRepo.getFormat().toString())
}
}
}
return groovy.json.JsonOutput.toJson([success: true])
The script below works for NPM repositories within a single blob. Haven't tried it for other cases. Nexus 3.43.0-01
import groovy.json.JsonOutput
import groovy.json.JsonSlurper
import org.joda.time.DateTime
import org.sonatype.nexus.repository.Format
import org.sonatype.nexus.repository.manager.RepositoryManager
import org.sonatype.nexus.repository.storage.Asset
import org.sonatype.nexus.repository.storage.Component
import org.sonatype.nexus.repository.storage.Query
import org.sonatype.nexus.repository.storage.StorageFacet
def request = new JsonSlurper().parseText(args);
assert request.srcRepoName: 'srcRepoName parameter is required';
assert request.dstRepoName: 'dstRepoName parameter is required';
def resultCode = 0;
def errMessage = '';
log.info('Try to sync assets from ' + request.srcRepoName + ' to ' + request.dstRepoName);
RepositoryManager repositoryManager = repository.repositoryManager;
def srcRepo = repositoryManager.get(request.srcRepoName);
StorageFacet srcStorageFacet = srcRepo.facet(StorageFacet);
def srcTx = srcStorageFacet.txSupplier().get();
class SrcFormat extends Format {
SrcFormat(String format) {
super(format);
}
};
try {
srcTx.begin();
log.info('syncRepos. Try to find source components');
Iterable<Component> srcComponents = srcTx.findComponents(Query.builder().where('1').eq(1).build(), [srcRepo]);
log.info('syncRepos. There are ' + srcComponents.size() + ' source components found');
srcTx.commit();
if (srcComponents != null && srcComponents.size() > 0) {
log.info('syncRepos. Try to create destination components and assets');
def dstRepo = repositoryManager.get(request.dstRepoName);
srcComponents.each {srcComponent ->
srcTx.begin();
Component dstComponent = null;
if (srcTx.componentExists(srcComponent.group(), srcComponent.name(), srcComponent.version(), dstRepo)) {
dstComponent = srcTx.findComponents(Query.builder().where('name').eq(srcComponent.name()).build(), [srcRepo])[0];
} else {
dstComponent = srcTx.createComponent(srcTx.findBucket(dstRepo), new SrcFormat(srcComponent.format()))
.name(srcComponent.name())
.group(srcComponent.group())
.version(srcComponent.version())
.attributes(srcComponent.attributes());
dstComponent.format(srcComponent.format());
srcTx.saveComponent(dstComponent);
};
srcTx.browseAssets(srcComponent).each { srcAsset ->
if (!srcTx.assetExists(srcAsset.name(), dstRepo)) {
def newAsset = srcTx.createAsset(srcTx.findBucket(dstRepo), dstComponent)
.name(srcAsset.name())
.attributes(srcAsset.attributes())
.blobRef(srcAsset.requireBlobRef());
newAsset.size(srcAsset.size());
newAsset.contentType(srcAsset.contentType());
newAsset.lastDownloaded(DateTime.now());
srcTx.saveAsset(newAsset);
};
};
srcTx.commit();
};
srcTx.begin();
log.info('syncRepos. Try to find source assets by application/json contentType');
Iterable<Asset> srcAssets = srcTx.findAssets(Query.builder().where('content_type').eq('application/json').build(), [srcRepo]);
log.info('syncRepos. There are ' + srcAssets.size() + ' source assets found');
srcAssets.each { srcAsset ->
if (!srcTx.assetExists(srcAsset.name(), dstRepo)) {
def newAsset = srcTx.createAsset(srcTx.findBucket(dstRepo), new SrcFormat(srcAsset.format()))
.name(srcAsset.name())
.attributes(srcAsset.attributes())
.blobRef(srcAsset.requireBlobRef());
newAsset.size(srcAsset.size());
newAsset.contentType(srcAsset.contentType());
newAsset.lastDownloaded(DateTime.now());
srcTx.saveAsset(newAsset);
};
};
srcTx.commit();
};
} catch (Exception e) {
log.error('syncRepos. Exception when sync repos: ' + e.message);
srcTx.rollback();
resultCode = -1;
errMessage = e.message;
} finally {
srcTx.close();
};
log.info('syncRepos. Try to return result');
def result = JsonOutput.toJson([
srcRepoName: request.srcRepoName,
dstRepoName: request.dstRepoName,
resultCode: resultCode,
errMessage: errMessage
]);
return result;

Groovy grep words in file

I want to grep words in file from path. How do it in groovy way ? How count how many words I find each file ?
import groovy.io.FileType
def splitStatements() {
String path = "C:\\Users\\John\\test"
def result = new AntBuilder().fileset( dir: path ) {
containsregexp expression:['END','BEGIN']
}*.file
println result
}
splitStatements()
That's doing that what I want :
def wordCount_END = 0
def wordCount_BEGIN = 0
def dir = new File("C:\\Users\\John\\test")
dir.eachFileRecurse (FileType.FILES) { file ->
Scanner s = new Scanner(file)
while (s.hasNext()) {
if (s.next().equals('BEGIN')) wordCount_END++
}
}
dir.eachFileRecurse (FileType.FILES) { file ->
Scanner s = new Scanner(file)
while (s.hasNext()) {
if (s.next().equals('END')) wordCount_BEGIN++
}
}
println("END count per lock: " + wordCount_END)
println("BEGIN count per lock: " + wordCount_BEGIN)
}

Groovy Half-mock with MockFor

I want to test the following class:
public class DBSync {
public dbNotify( String path ) {
if (!path) {
return
}
def pathIndex = path.lastIndexOf(File.separator)
if (pathIndex > 0) {
def folder = path[0..pathIndex - 1]
def fileName = path[pathIndex + 1..path.length() - 1]
println "Syncing from directory $folder for filename $fileName"
if (fileName.contains(EXCLUDE_FILE_PATTERN)) {
println "Filename is $EXCLUDE_FILE_PATTERN skipping db write "
return
}
writeToDB(folder, fileName)
}
}
public writeToDB ( folder, file ) {
// inserting to database
}
}
The test class is:
public class TestDBSync {
#Test
public void test() {
def dbSyncContext = new MockFor(DBSync)
def file = "file.zip"
def folder = "data"
def path = folder + File.separator + file
def called = false
// Expect at least one call
mockDBSyncContext.demand.writeToDB(1..1) { String folderargs, String fileargs -> called = true }
mockDBSyncContext.demand.dbNodify(1..1) { String pathargs -> return }
// Obtaining a usuable mock instance
def mockDBSyncProxy = mockDBSyncContext.proxyInstance()
// Fake calling the method
mockDBSyncContext.use {
mockDBSyncProxy.dbNotify(path )
}
// Verify invoked at least once?
mockDBSyncContext.verify(mockDBSyncProxy)
}
}
The test is failing and I am getting the following error:
junit.framework.AssertionFailedError: No call to 'dbNotify' expected
at this point. Still 1 call(s) to 'writeToDB' expected.

Modifying the file contents of a zipfile entry

I would like to update the contents of text file located inside a zipfile.
I cannot find out how to do this, and the code below is not working properly.
May thanks for any help!!
import java.util.zip.ZipFile
import java.util.zip.ZipEntry
import java.util.zip.ZipOutputStream
String zipFileFullPath = "C:/path/to/myzipfile/test.zip"
ZipFile zipFile = new ZipFile(zipFileFullPath)
ZipEntry entry = zipFile.getEntry ( "someFile.txt" )
if(entry){
InputStream input = zipFile.getInputStream(entry)
BufferedReader br = new BufferedReader(new InputStreamReader(input, "UTF-8"))
String s = null
StringBuffer sb = new StringBuffer()
while ((s=br.readLine())!=null){
sb.append(s)
}
sb.append("adding some text..")
ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFileFullPath))
out.putNextEntry(new ZipEntry("someFile.txt"));
int length
InputStream fin = new ByteArrayInputStream(sb.toString().getBytes("UTF8"))
while((length = fin.read(sb)) > 0)
{
out.write(sb, 0, length)
}
out.closeEntry()
}
Just some slight modifications to #Opal's answer, I've just:
used groovy methods where possible
packaged in a method
Groovy Snippet
void updateZipEntry(String zipFile, String zipEntry, String newContent){
def zin = new ZipFile(zipFile)
def tmp = File.createTempFile("temp_${System.nanoTime()}", '.zip')
tmp.withOutputStream { os ->
def zos = new ZipOutputStream(os)
zin.entries().each { entry ->
def isReplaced = entry.name == zipEntry
zos.putNextEntry(isReplaced ? new ZipEntry(zipEntry) : entry)
zos << (isReplaced ? newContent.getBytes('UTF8') : zin.getInputStream(entry).bytes )
zos.closeEntry()
}
zos.close()
}
zin.close()
assert new File(zipFile).delete()
tmp.renameTo(zipFile)
}
Usage
updateZipEntry('/tmp/file.zip', 'META-INF/web.xml', '<foobar>new content!</foobar>')
What exactly isn't working? Is there any exception thrown?
As far as I know it's not possible to modify a zip file in situ. The following script rewrites the file and if desired entry is processed - modifies it.
import java.util.zip.*
def zipIn = new File('lol.zip')
def zip = new ZipFile(zipIn)
def zipTemp = File.createTempFile('out', 'zip')
zipTemp.deleteOnExit()
def zos = new ZipOutputStream(new FileOutputStream(zipTemp))
def toModify = 'lol.txt'
for(e in zip.entries()) {
if(!e.name.equalsIgnoreCase(toModify)) {
zos.putNextEntry(e)
zos << zip.getInputStream(e).bytes
} else {
zos.putNextEntry(new ZipEntry(toModify))
zos << 'lollol\n'.bytes
}
zos.closeEntry()
}
zos.close()
zipIn.delete()
zipTemp.renameTo(zipIn)
UPDATE
I wasn't right. It's possible to modify zip file in situ, but Your solution will omit other files that were zipped. The output file will contain only one single file - the file You wanted to modify. I also suppose that You file was corrupted because of not invoking close() on out.
Below is You script slightly modified (more groovier):
import java.util.zip.*
def zipFileFullPath = 'lol.zip'
def zipFile = new ZipFile(zipFileFullPath)
def entry = zipFile.getEntry('lol.txt')
if(entry) {
def input = zipFile.getInputStream(entry)
def br = new BufferedReader(new InputStreamReader(input, 'UTF-8'))
def sb = new StringBuffer()
sb << br.text
sb << 'adding some text..'
def out = new ZipOutputStream(new FileOutputStream(zipFileFullPath))
out.putNextEntry(new ZipEntry('lol.txt'))
out << sb.toString().getBytes('UTF8')
out.closeEntry()
out.close()
}

how reading nutch generated content data on the segment folder using java

I am trying to read the content data inside the segment folder. I think the content data file is written in a custom format
I experimented with nutch's Content class, but it does not recognize the format.
import java.io.IOException;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.util.NutchConfiguration;
public class ContentReader {
public static void main(String[] args) throws IOException {
// Setup the parser
Configuration conf = NutchConfiguration.create();
Options opts = new Options();
GenericOptionsParser parser = new GenericOptionsParser(conf, opts, args);
String[] remainingArgs = parser.getRemainingArgs();
FileSystem fs = FileSystem.get(conf);
String segment = remainingArgs[0];
Path file = new Path(segment, Content.DIR_NAME + "/part-00000/data");
SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);
Text key = new Text();
Content content = new Content();
// Loop through sequence files
while (reader.next(key, content)) {
try {
System.out.write(content.getContent(), 0,
content.getContent().length);
} catch (Exception e) {
}
}
}
}
org.apache.nutch.segment.SegmentReader
has a map reduce implementation that reads content data in the segment directory.
spark/scala code to read data from the segments content folder.
How I read from the content folder in my project.
I have created a case class page which holds data read from the content folder
case class Page(var url: String, var title: String = null
,var contentType: String = null, var rawHtml: String = null,var language: String = null
,var metadata: Map[String,String])
Code to read from content folder
import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.io.{Text, Writable}
import org.apache.nutch.crawl.{CrawlDatum, Inlinks}
import org.apache.nutch.parse.ParseText
import org.apache.nutch.protocol.Content
val contentDF = spark.sparkContext.sequenceFile(path.contentLocation, classOf[Text], classOf[Writable])
.map { case (x, y) => (x.toString, extract(y.asInstanceOf[Content])) }
/** converts Content object to Page **/
def extract(content: Content): Page = {
try {
val parsed = Page(content.getUrl)
var charset: String = getCharsetFromContentType(content.getContentType)
if (StringUtils.isBlank(charset)) {
charset = "UTF-8"
}
parsed.rawHtml = Try(new String(content.getContent, charset)).getOrElse(new String(content.getContent, "UTF-8"))
parsed.contentType = Try(content.getMetadata.get("Content-Type")).getOrElse("text/html")
// parsed.isHomePage = Boolean.valueOf(content.getMetadata.get("isHomePage"))
parsed.metadata = content.getMetadata.names().map(name => (name,content.getMetadata.get(name))).toMap
Try {
if (StringUtils.isNotBlank(content.getMetadata.get("Content-Language")))
parsed.language = content.getMetadata.get("Content-Language")
else if (StringUtils.isNotBlank(content.getMetadata.get("language")))
parsed.language = content.getMetadata.get("language")
else parsed.language = content.getMetadata.get("lang")
}
parsed
} catch {
case e: Exception =>
LOG.error("ERROR while extracting data from Content ", e)
null
}
}
/**Get Html ContentType **/
def getCharsetFromContentType(contentType: String): String = {
var result: String = "UTF-8"
Try {
if (StringUtils.isNotBlank(contentType)) {
val m = charsetPattern.matcher(contentType)
result = if (m.find) m.group(1).trim.toUpperCase else "UTF-8"
}
}
result
}

Resources