Keyerror by find method in XML etree child - python-3.x

i create a simple xml structure:
import xml.etree.cElementTree as ET
root = ET.Element("root")
doc = ET.SubElement(root, "doc", name="doc1")
ET.SubElement(doc, "rank").text = "Employee"
ET.SubElement(doc, "skill").text = "nothing"
Now i want to return the values but i get this error: "KeyError: 'Employee'"
for doc in root.findall('doc'):
rank = doc.find('rank').text
skill = doc.find('skill').text
name = doc.get('name')
Logging(name, rank, skill)

The issue is the parameters in the Logging method.
I switch Logging to print and everthing is working.

Related

Indexing and searching in documents using Pylucene

I would like to index some documents and then search them for specific terms and retrieve their position in the documents. I am very unsuccessful in this task as all the examples are in JAVA and more importantly they use older version of lucene which is very different from the current version of lucene.
This is my snippet that creates the index:
import pandas as pd
import operator
import lucene
from java.io import StringReader
from java.io import File
from org.apache.lucene.analysis.en import EnglishAnalyzer
from org.apache.lucene.document import Document, Field, FieldType
from org.apache.lucene.search import IndexSearcher
from org.apache.lucene.index import DirectoryReader, PostingsEnum, IndexOptions, IndexWriter, IndexWriterConfig
from org.apache.lucene.store import FSDirectory, ByteBuffersDirectory
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.util import Version, BytesRefIterator
# Init
if not lucene.getVMEnv():
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
directory = ByteBuffersDirectory()
iconfig = IndexWriterConfig(EnglishAnalyzer())
iwriter = IndexWriter(directory, iconfig)
ft = FieldType()
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
ft.setStored(True)
ft.setTokenized(True)
ft.setStoreTermVectors(True)
ft.setStoreTermVectorOffsets(True)
ft.setStoreTermVectorPositions(True)
ts = ["this bernhard is the text to be index text",
"this claudia is the text to be indexed"]
for t in ts:
doc = Document()
doc.add(Field("content", t, ft))
iwriter.addDocument(doc)
iwriter.commit()
iwriter.close()
This is the part of code that I try to start reading the index to extract the position of a term:
analyzer = StandardAnalyzer()
reader = DirectoryReader.open(directory)
searcher = IndexSearcher(DirectoryReader.open(directory))
searcher.setSimilarity(BM25Similarity(1.2,0.75))
query = QueryParser('content', analyzer).parse("world")
scoreDocs = searcher.search(query, 10).scoreDocs # it returns TopDocs object containing scoreDocs and totalHits
## scoreDoc object contains docId and score
print('total hit:', searcher.search(query, 10).totalHits)
print("%s total matching documents" % (len(scoreDocs)))
for scoreDoc in scoreDocs:
print(scoreDoc)
fields = reader.getTermVectors(scoreDoc.doc)
print('fields:', fields.terms('content'))
fieldsIter = fields.iterator()
terms = reader.getTermVector(scoreDoc.doc, "content")
termsIter = terms.iterator();
print('terms.position:', terms.hasPositions())
However, it is incomplete and I do not know how to complete the code. Any help is appreciated.

Django Factory Boy object does not exist

I have an issue regarding factory boy using in the testing of my Lets assume I have this three models:
Class Company(models.Model):
name = str
Class Domain(models.Model):
company = ForeignKey(ref=Company)
name = str
created_at = datetime
Class Record(models.Model):
domain = ForeignKey(ref=Domain)
name = str
created_at = datetime
CompanyFactory(factory.django.DjangoModelFactory):
name = str
DomainFactory(factory.django.DjangoModelFactory):
company = factory.SubFactory(CompanyFactory)
name = str
created_at = datetime
RecordFactory(factory.django.DjangoModelFactory):
domain = factory.SubFactory(DomainFactory)
name = str
created_at = datetime
Having this, when I'm testing the Record views, at the begginning of every view I check that the Domain object is, in fact, related to the Company object such as:
try:
domain = Domain.objects.get(domain=domain_id, company__id=company_id)
except ObjectDoesNotExist:
return Response(
data={"message": "Domain isn't related to the company provided."}, status=status.HTTP_403_FORBIDDEN
)
But this code always returns an ObjectDoesNotExist exception when I make the testing with pytest+factory-boy but when I do manual testing runs fine. Have you experienced something similar? What I'm missing here?
Thanks in advance.
As requested per #gbrennon I'm adding the test code:
Hi! Thanks for answering.
My test code is as it follows:
class RecordCompanyAdminTests(CompanyAdminUser):
def setUp(self):
super(RecordCompanyAdminTests, self).setUp()
self.domain = DomainFactory.create()
self.record = RecordFactory.create()
def test_record_list_get(self):
url = reverse("autoprovisioning:record_list", kwargs={"company_id": self.company.id, "domain_id": self.domain.id})
response = self.client.get(url, format="json")
self.assertEqual(response.status_code, status.HTTP_200_OK)
how are u doing?
the test code wasn't included but i'll try to infer things
when I do manual testing runs fine
it seems like u already populated ur database! but listen, when u run ur test suite django will be using a "test database" in favor of isolation!
what u need to do is to create, using the factory_boy lib, ur "data fixtures"!
my suggestion without any context:
class MyTestCase(TestCase):
def setUp(self):
self.existing_domain_in_database = DomainFactory.create(...) # insert here the data to populate this model
and the desired data should already exists in ur "test database" for every test that is going to be run inside of this class

py2neo v3 AttributeError: object has no attribute 'db_exists'

Trying to import data to a clean neo4j graph database using py2neo version 3. I've defined several node types as below, and everything seemed to be going well – except that I wasn't seeing the nodes show up in my neo4j browser.
Here's the relevant import code; I've verified that the records load properly into Python variables.
for row in data:
ds = DataSource()
# parse Source of Information column as a list, trimming whitespace
ds.uri = list(map(str.strip, row['data_source'].split(',')))
ds.description = row['data_source_description']
graph.merge(ds)
But when I tried to do graph.exists(ds), I got back the following set of errors / tracebacks:
Traceback (most recent call last):
File "mydir/venv/lib/python3.5/site-packages/py2neo/database/__init__.py", line 1139, in exists
return subgraph.__db_exists__(self)
AttributeError: 'DataSource' object has no attribute '__db_exists__'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "mydir/venv/lib/python3.5/site-packages/py2neo/database/__init__.py", line 478, in exists
return self.begin(autocommit=True).exists(subgraph)
File "mydir/venv/lib/python3.5/site-packages/py2neo/database/__init__.py", line 1141, in exists
raise TypeError("No method defined to determine the existence of object %r" % subgraph)
TypeError: No method defined to determine the existence of object <DataSource uri=['my_uri']>
Much to my surprise, I can't find another forum post discussing this problem. I'm guessing that there's a problem inheriting from GraphObject, but there doesn't seem to be an explicit definition of a __db_exists__ property for GraphObject, either. In fact, the only place I can find that property mentioned is in the definition of the exists function, when it generates this error.
Can anyone see what I'm doing wrong here?
The node class definitions are as follows:
class Content(GraphObject): # group Person and Institution
pass
class Person(Content):
__primarykey__ = 'name'
name = Property()
in_scholar_names = Property()
#
mentored = RelatedTo('Person')
mentored_by = RelatedFrom('Person', 'MENTORED')
worked_alongside = Related('Person', 'WORKED_ALONGSIDE')
studied_at = RelatedTo('Institution')
worked_at = RelatedTo('Institution')
tagged = RelatedTo('Tag')
member_of = RelatedTo('Institution')
last_update = RelatedTo('UpdateLog')
def __lt__(self, other):
return self.name.split()[-1] < other.name.split()[-1]
class Institution(Content):
__primarykey__ = 'name'
#
name = Property()
location = Property()
type = Property()
carnegie_class = Property()
#
students = RelatedFrom('Person', 'STUDIED_AT')
employees = RelatedFrom('Person', 'WORKED_AT')
members = RelatedFrom('Person', 'MEMBER_OF')
last_update = RelatedTo('UpdateLog')
def __lt__(self, other):
return self.name < other.name
class User(GraphObject):
__primarykey__ = 'username'
username = Property()
joined = Property()
last_access = Property()
active = Property()
contributed = RelatedTo('UpdateLog')
class Provenance(GraphObject): # group UpdateLog and DataSource
pass
#
class UpdateLog(Provenance):
__primarykey__ = 'id'
id = Property()
timestamp = Property()
query = Property()
previous = RelatedTo('UpdateLog', 'LAST_UPDATE')
next = RelatedFrom('UpdateLog', 'LAST_UPDATE')
based_on = RelatedTo('Provenance', 'BASED_ON')
affected_nodes = RelatedFrom('Content', 'LAST_UPDATE')
contributed_by = RelatedFrom('User', 'CONTRIBUTED')
class DataSource(Provenance):
__primarykey__ = 'uri'
id = Property()
description = Property()
uri = Property()
source_for = RelatedFrom('UpdateLog', 'BASED_ON')
class Tag(GraphObject):
__primarykey__ = 'name'
name = Property()
description = Property()
see_also = Related('Tag')
tagged = RelatedFrom('Content')
Okay, I think I figured it out. I had been learning py2neo in the context of Flask, where all those class definitions are important and useful for generating views (web pages) of the relationships on a given node.
But for the data import script I'm currently writing, i.e. to actually create the nodes and relationships in the first place, I need to use the vanilla classes of 'Node' and 'Relationship', and just specify the types as parameters on the function. This updated version of the original code above produces no errors, and graph.exists(ds) returns true afterward:
for row in data:
ds = Node("DataSource")
# parse Source of Information column as a list, trimming whitespace
ds['uri'] = list(map(str.strip, row['data_source'].split(',')))
ds['description'] = row['data_source_description']
graph.merge(ds)
Two other discoveries of note:
My class inheritance was off the mark to begin with, because I should have been trying to inherit from Node, not GraphObject (even though GraphObject was the correct class to inherit back in the context of Flask)
For the Node class, I have to use dict-style assignment of properties, with the square brackets and key names as quoted strings; the dot notation was off base here, and I'm surprised I didn't get more errors thrown, and sooner.

How to get a list of modified fields with ScriptRunner on IssueUpdated event?

In my Jira instance, I created a Script Listener for the IssueUpdated event using the ScriptRunner add-on and I'm trying to get a list of the changed fields. For some reason the method getModifiedFields() is coming empty, somebody can help me to fix that?
import com.atlassian.jira.issue.managers.DefaultIssueManager
import com.atlassian.jira.issue.IssueManager
import com.atlassian.jira.issue.fields.CustomField
import com.atlassian.jira.issue.Issue
import com.atlassian.jira.component.ComponentAccessor
import com.atlassian.jira.issue.MutableIssue
import com.atlassian.jira.issue.ModifiedValue
log.setLevel(org.apache.log4j.Level.DEBUG)
Issue mainIssue = event.issue
MutableIssue mutableIssue = (MutableIssue)mainIssue
def modFields = mutableIssue.getModifiedFields()
log.debug("Modified fields count: "+modFields.count) // null
log.debug("Modified fields: "+modFields.toString()) // [:]
log.debug("Original Ticket: "+mainIssue.key) // EPS-39
After turning google upside down I've found the answer to my question. More background here: my final goal is to update a target Jira ticket (in another project) with whatever was changed in the main ticket. I have a custom field that contains the original ticket key, so I can track it down (ex: ticket "PRJ-1" -> "Original Ticket": "TRG-1"). To do this I'm going to ScriptRunner -> Script listeners -> custom listener and adding an "Issue Updated" listener.
I'm still stuck in the part where I have a custom label field as you can see by the comments in the end of the code, but at least I could manage to get a list of the changed fields in my main ticket.
import com.atlassian.jira.issue.managers.DefaultIssueManager
import com.atlassian.jira.issue.IssueManager
import com.atlassian.jira.issue.fields.CustomField
import com.atlassian.jira.issue.Issue
import com.atlassian.jira.issue.IssueInputParameters
import com.atlassian.jira.component.ComponentAccessor
import com.atlassian.jira.issue.MutableIssue
import com.atlassian.jira.issue.ModifiedValue
import org.ofbiz.core.entity.GenericValue
import com.atlassian.jira.bc.issue.IssueService
import com.atlassian.jira.ComponentManager
import com.atlassian.jira.bc.issue.IssueService
import com.atlassian.jira.bc.issue.DefaultIssueService
log.setLevel(org.apache.log4j.Level.DEBUG)
def customFieldManager = ComponentAccessor.getCustomFieldManager()
Issue mainIssue = event.issue
// Get Custom Field Object "Original Ticket"
def originalTicket = customFieldManager.getCustomFieldObjectByName("Original Ticket")
// Get Value of the Custom Field Object "Original Ticket"
String targetTicketKey = mainIssue.getCustomFieldValue(originalTicket).toString()
// Get Original Ticket Object based on the custom field value
def targetIssue = ComponentAccessor.getIssueManager().getIssueObject(targetTicketKey)
// Get list of modified values in the original ticket to update target ticket
List<GenericValue> changeItemsList = event.getChangeLog().getRelated("ChildChangeItem")
Iterator<GenericValue> changeItemListIterator = changeItemsList.iterator()
Object oldValue
Object newValue
def userManager = ComponentAccessor.getUserManager()
def auser = userManager.getUserByKey("Sync User")
def issueManager = ComponentAccessor.getIssueManager()
CustomField custom
// Loop for all the changed fields
while (changeItemListIterator.hasNext()) {
GenericValue changeItem = (GenericValue)changeItemListIterator.next()
String currentFieldName = changeItem.get("field").toString()
log.debug("Current field: "+currentFieldName)
oldValue = changeItem.get("oldstring")
newValue = changeItem.get("newstring")
if (oldValue != null && newValue != null){
log.debug("Field changed from: "+oldValue+" to "+newValue)
switch (currentFieldName){
case "summary":
log.debug("Found switch: Summary")
targetIssue.setSummary(newValue.toString())
break
case "description":
log.debug("Found switch: Description")
targetIssue.setDescription(newValue.toString())
break
case "Affected Version(s)":
log.debug("Found switch: Affected Version(s)")
// This is a label field. I'm stuck here and I don't know how to manipulate the value.
// Labels are a set type, studing more about it.
custom = customFieldManager.getCustomFieldObjectByName("Affected Version(s)")
targetIssue.setCustomFieldValue(custom,newValue)
break
default: log.debug("Not found: "+currentFieldName)
}
}
// Update my target Issue (in another project) that I'm trying to synchronize with the main issue.
issueManager.updateIssue(auser, targetIssue, com.atlassian.jira.event.type.EventDispatchOption.DO_NOT_DISPATCH, false)
}
mutableIssue.modifiedFields used only in scripted Validators.
Iterating through modified fileds in scripted Listener:
List<HashMap<String, Object>> fieldsModified = event.getChangeLog()?.getRelated('ChildChangeItem') as List<HashMap<String, Object>>
for (HashMap<String, Object> field : fieldsModified)
log.debug("Field: ${field["field"]}, old value: ${field["oldstring"]}, new value: ${field["newstring"]}.")
Affected versions is a system field like Summary and Description. Use issue.affectedVersions to update it.

Groovy Scripted field to display result of JIRA JQL

I want to get some pointer to write a simple JIRA groovy scripted field – the input is a JQL and the result is the result of the JQL.
For example, if the JQL is "project = RS and fixVersion = 5.0", it will go ahead a list the issues returned from this JQL in the custom field display.
First I created a JIRA field called "Fixed Issues JQL", which supposed I will enter the value of "project = VOL and fixVersion = 6.0" in the JIRA.
Then I create a second JIRA custom field , a groovy scripted field called "Fixed Issues List", which contain the following code:
import com.atlassian.crowd.embedded.api.User
import com.atlassian.jira.bc.issue.search.SearchService
import com.atlassian.jira.component.ComponentAccessor
import com.atlassian.jira.issue.Issue
import com.atlassian.jira.issue.IssueManager
import com.atlassian.jira.user.util.UserUtil
import com.atlassian.jira.web.bean.PagerFilter
import com.atlassian.jira.ComponentManager
import com.atlassian.jira.issue.customfields.manager.OptionsManager
SearchService searchService = ComponentAccessor.getComponent(SearchService.class)
UserUtil userUtil = ComponentAccessor.getUserUtil()
User user = ComponentAccessor.getJiraAuthenticationContext().getLoggedInUser()
IssueManager issueManager = ComponentAccessor.getIssueManager()
def componentManager = ComponentManager.instance
def optionsManager = ComponentManager.getComponentInstanceOfType(OptionsManager.class)
def customFieldManager = componentManager.getCustomFieldManager()
def cf = customFieldManager.getCustomFieldObjectByName("Fixed Issues JQL")
def myJQL = issue.getCustomFieldValue(cf) // has a value such as "project = VOL and fixVersion = 6.0"
if (!user) {
user = userUtil.getUserObject('kwhite')
}
List<Issue> issues = null
SearchService.ParseResult parseResult = searchService.parseQuery(user, myJQL)
if (parseResult.isValid()) {
def searchResult = searchService.search(user, parseResult.getQuery(), PagerFilter.getUnlimitedFilter())
// Transform issues from DocumentIssueImpl to the "pure" form IssueImpl (some methods don't work with DocumentIssueImps)
issues = searchResult.issues.collect {issueManager.getIssueObject(it.id)}
} else {
log.error("Invalid JQL: " + myJQL);
}

Resources