Writing to a json file with Polish characters

Writing to a json file with Polish characters - python-3.x

I am using a JSON file to send data from an LDAP database on linux ADDC SAMBA for further processing. I fetch the data with a script written in python3. My problem is that some fields contain Polish characters that are encoded in unicode, for example "Bo\u017Cena \u017Ar\u00F3dlana" should be "Bożena Źródlana" . I would like the file to contain already decoded data so that I can read them without guessing what character is behind the unicode code.
I need to ask for help where in my code I should put something similar to a decoder so that the entire file is already saved as decoded and containing Polish special characters
my python3 code:
#! /usr/bin/python3
import os
import configparser
import getpass
import sys
import json
import ssl
import shutil
from ldap3 import Server, Connection, Tls, ALL_ATTRIBUTES
from datetime import date
# screen cleaner
os.system('clear')
# timestamp
current_datetime = str(date.today())
# load main config files
main_conf_file = "/tmp/ldap-searchlight/config/searchlight.conf"
config = configparser.RawConfigParser()
config.read(main_conf_file)
# variables
main_path = config['GLOBAL']['main_path']
conf_path = config['GLOBAL']['conf_path']
data_path = config['GLOBAL']['data_path']
arch_patch = config['GLOBAL']['arch_patch']
json_users_file = config['USERS']['json_users_file']
json_cmptrs_file = config['CMPTRS']['json_cmptrs_file']
# ldap variables
ldap_base_dn = config['GLOBAL']["ldap-base-dn"]
ldap_users = config['USERS']['ldap-users']
ldap_cmptrs = config['CMPTRS']['ldap_cmptrs']
user1_name = config['USERS']['user1-name']
user2_name = config['USERS']['user2-name']
user3_name = config['USERS']['user3-name']
user4_name = config['USERS']['user4-name']
user5_name = config['USERS']['user5-name']
# user's choice
print(
"Logujesz się jako:\n" +
" wybierz [ 1 ] dla " + user1_name + "\n" +
" wybierz [ 2 ] dla " + user2_name + "\n" +
" wybierz [ 3 ] dla " + user3_name + "\n" +
" wybierz [ 4 ] dla " + user4_name + "\n" +
" wybierz [ 5 ] dla " + user5_name + "\n"
)
input_name = input("WYBRANO: ")
if input_name == "1" :
user = config["USERS"]["ldap-user1"]
elif input_name == "2" :
user = config["USERS"]["ldap-user2"]
elif input_name == "3" :
user = config["USERS"]["ldap-user3"]
elif input_name == "4" :
user = config["USERS"]["ldap-user4"]
elif input_name == "5" :
user = config["USERS"]["ldap-user5"]
else:
print("Permission danied\n")
sys.exit(1)
password = getpass.getpass()
LDAP_HOST = config['GLOBAL']['ldap-host']
LDAP_USER = user +","+ ldap_users +","+ ldap_base_dn
LDAP_PASSWORD = password
tls_configuration = Tls(validate=ssl.CERT_NONE, version=ssl.PROTOCOL_TLSv1)
def ldap_server():
return Server(LDAP_HOST, use_ssl=True, tls=tls_configuration, get_info=ALL_ATTRIBUTES)
def ldap_connection():
server = ldap_server(),
return Connection(server, user=LDAP_USER,
password=LDAP_PASSWORD,
auto_bind=True)
# ldap users
LDAP_BASE_DN = ldap_users +","+ ldap_base_dn
LDAP_OBJECT_FILTER = '(objectclass=user)'
user_attr_list=[ \
'cn', \
'sn', \
'givenName', \
'instanceType', \
'whenCreated', \
'displayName', \
'uSNCreated', \
'name', \
'objectGUID', \
'badPwdCount', \
'codePage', \
'countryCode', \
'badPasswordTime', \
'lastLogoff', \
'lastLogon',\
'primaryGroupID', \
'objectSid', \
'accountExpires', \
'logonCount', \
'sAMAccountName', \
'sAMAccountType', \
'userPrincipalName', \
'objectCategory', \
'pwdLastSet', \
'userAccountControl', \
'lastLogonTimestamp', \
'whenChanged', \
'uSNChanged', \
'memberOf', \
'distinguishedName' ]
conn = ldap_connection()
conn.search(LDAP_BASE_DN, LDAP_OBJECT_FILTER, attributes=user_attr_list)
# output to json
json_users_data = main_path + data_path + json_users_file
data = json.loads(conn.response_to_json())
with open(json_users_data, 'w') as jsonfile:
json.dump(data, jsonfile)
# copy data to archive
json_users_arch = main_path + arch_patch + current_datetime + "_" + json_users_file
shutil.copy2(json_users_data, json_users_arch)
# ldap computers
LDAP_BASE_DN = ldap_cmptrs +","+ ldap_base_dn
LDAP_OBJECT_FILTER = '(objectclass=computer)'
cmptr_attr_list=[ \
'cn', \
'instanceType', \
'whenCreated', \
'uSNCreated', \
'name', \
'objectGUID', \
'badPwdCount', \
'codePage', \
'countryCode', \
'badPasswordTime', \
'lastLogoff', \
'lastLogon',\
'primaryGroupID', \
'accountExpires', \
'logonCount', \
'sAMAccountName', \
'sAMAccountType', \
'objectCategory', \
'pwdLastSet', \
'userAccountControl', \
'lastLogonTimestamp', \
'whenChanged', \
'uSNChanged', \
'dNSHostName', \
'isCriticalSystemObject', \
'msDS-SupportedEncryptionTypes', \
'operatingSystem', \
'operatingSystemVersion', \
'servicePrincipalName', \
'distinguishedName' ]
conn = ldap_connection()
conn.search(LDAP_BASE_DN, LDAP_OBJECT_FILTER, attributes=cmptr_attr_list)
# output to json
json_cmptrs_data = main_path + data_path + json_cmptrs_file
data = json.loads(conn.response_to_json())
with open(json_cmptrs_data, 'w') as jsonfile:
json.dump(data, jsonfile)
# copy data
json_cmptrs_arch = main_path + arch_patch + current_datetime + "_" + json_cmptrs_file
shutil.copy2(json_cmptrs_data, json_cmptrs_arch)
print("USERS:")
print("Data file created at: " + json_users_data)
print("Archive file created at: " + json_users_arch)
print("------------------------------------------------------------------------------")
print("COMPUTERS")
print("Data file created at: " + json_cmptrs_data)
print("Archive file created at: " + json_cmptrs_arch)
sys.exit(0)
# exit(0) -> OK
# exit(1) -> FAULT
my jsons output looks:
{"entries": [
{"attributes":
{
"accountExpires": ["9223372036854775807"],
"badPasswordTime": [],
"badPwdCount": [],
"cn": ["Bo\u017Cena \u017Ar\u00F3dlana"],
"codePage": ["0"],
"countryCode": ["0"],
"displayName": ["Bo\u017Cena \u017Ar\u00F3dlana"],
"distinguishedName": ["CN=Bo\u017Cena \u017Ar\u00F3dlana,OU=FE,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"],
"givenName": ["Bo\u017Cena"],
"instanceType": ["4"],
"lastLogoff": [],
"lastLogon": [],
"lastLogonTimestamp": ["132978476924537530"],
"logonCount": [],
"memberOf": [],
"name": ["Bo\u017Cena \u017Ar\u00F3dlana"],
"objectCategory": ["CN=Person,CN=Schema,CN=Configuration,DC=universum,DC=local"],
"objectGUID": [
{
"encoded": "AFvzBO0T+Ey9TL3RHGtghQ==",
"encoding": "base64"
}
],
"objectSid": [
{
"encoded": "AQUAAAAAAAUVAAAA6TO9FZD9W8QoWlFDIE8AAA==",
"encoding": "base64"
}
],
"primaryGroupID": ["513"],
"pwdLastSet": ["132979783101549910"],
"sAMAccountName": ["pjarmolowicz"],
"sAMAccountType": ["805306368"],
"sn": ["\u017Ar\u00F3dlana"],
"uSNChanged": ["4986"],
"uSNCreated": ["4986"],
"userAccountControl": ["512"],
"userPrincipalName": ["bzrodlana#universum.local"],
"whenChanged": ["20220525185150.0Z"],
"whenCreated": ["20211125124337.0Z"]},
"dn": "CN=Bo\u017Cena \u017Ar\u00F3dlana,OU=FE,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"
},
{"attributes": {
"accountExpires": ["9223372036854775807"],
"badPasswordTime": ["133128872888506790"],
"badPwdCount": ["0"],
"cn": ["Jan Kowalski"],
"codePage": ["0"],
"countryCode": ["0"],
"displayName": ["Jan Kowalski"],
"distinguishedName": ["CN=Jan Kowalski,OU=RR-32,OU=RR,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"],
"givenName": ["Jan"],
"instanceType": ["4"],
"lastLogoff": [],
"lastLogon": ["133129921828641420"],
"lastLogonTimestamp": ["133125345565644950"],
"logonCount": ["55"],
"memberOf": [],
"name": ["Jan Kowalski"],
"objectCategory": ["CN=Person,CN=Schema,CN=Configuration,DC=universum,DC=local"],
"objectGUID": [
{
"encoded": "AScnTASpKUun4oadMC5Qxg==",
"encoding": "base64"
}
],
"objectSid": [
{
"encoded": "AQUAAAAAAAUVAAAA6TO9FZD9W8QoWlFDngQAAA==",
"encoding": "base64"
}
],
"primaryGroupID": ["513"],
"pwdLastSet": ["131577266641617910"],
"sAMAccountName": ["jkowalski"],
"sAMAccountType": ["805306368"],
"sn": ["Kowalski"],
"uSNChanged": ["149609"],
"uSNCreated": ["5397"],
"userAccountControl": ["512"],
"userPrincipalName": ["jkowalski#universum.local"],
"whenChanged": ["20221110061556.0Z"],
"whenCreated": ["20130610115016.0Z"],
"dn": "CN=Jan Kowalski,OU=RR-32,OU=RR,OU=Users,OU=UNIVERSUM,DC=universum,DC=local"
}
]
}

Use the following to suppress Unicode escape codes and write the data UTF-8-encoded to support non-ASCII characters.
with open(json_cmptrs_data, 'w', encoding='utf8') as jsonfile:
json.dump(data, jsonfile, ensure_ascii=False)
Working example:
import json
data = {"cn": ["Bo\u017Cena \u017Ar\u00F3dlana"]}
with open('output.json', 'w', encoding='utf8') as file:
json.dump(data, file, ensure_ascii=False)
output.csv (UTF-8-encoded):
{"cn": ["Bożena źródlana"]}

Related

how to enable emmet in vim for ejs file

I wanna use emmet-vim on ejs files, my .vimrc config is
let g:user_emmet_install_global = 0
autocmd FileType html,css,ejs EmmetInstall
" redefine trigger key
let g:user_emmet_leader_key=','
let g:user_emmet_settings = {
\ 'php' : {
\ 'extends' : 'html',
\ 'filters' : 'c',
\ },
\ 'xml' : {
\ 'extends' : 'html',
\ },
\ 'haml' : {
\ 'extends' : 'html',
\ },
\ 'ejs' : {
\ 'extends' : 'html',
\ }}
yet it couldn't work, can anyone help?
P.S. my emmet-vim functions normally on html and css files

Maybe you can try it in this way with web-api
let g:user_emmet_settings = webapi#json#decode(join(readfile(expand('~/.snippets_custom.json')), "\n"))
Reference: emment-vim

Convert a key-value representing string to a list

I have a string say something like
'Content-Type: application/json' \
'Postman-Token: a47537e5-b4b0-4915-93c8-92acf4b21e70' \
'cache-control: no-cache' \
I want it in a format like
['Content-Type' : 'application/json','Postman-Token' : 'a47537e5-b4b0-4915-93c8-92acf4b21e70','cache-control' : 'no-cache']

Here is a code for you:
def tokens = 'Content-Type: application/json Postman-Token: a47537e5-b4b0-4915-93c8-92acf4b21e70 cache-control: no-cache'.minus(":").replaceAll(":", "") .split(" ")
def map2 = [ : ]
def i = 0
0.step(tokens.length, 2) {
map2.put(tokens[i], tokens[i+1])
i = i + 2
}
println JsonOutput.toJson(map2)
The result:
{"Content-Type":"application/json","Postman-Token":"a47537e5-b4b0-4915-93c8-92acf4b21e70","cache-control":"no-cache"}

How to grammatically enable the issue tracker of a given repository I own through the GitHub API?

I have bunch of repository forks, and I would like to enable all their issue trackers. I am not sure why, GitHub comes with them disabled by default and I had forgot to enable them when forking.
Now would be too much work enable their issues tracker one by one, then, I though I could write a program to do this. For now, I managef to get a list of all repositories I own, with the following code:
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import os
import shlex
import json
import subprocess
current_directory = os.path.dirname( os.path.realpath(__file__) )
print( 'directory walk %s', current_directory )
token = "Authorization: token mynicetoken102312312541230240021470300250230"
user_name = "myusername"
def run_command(absolute_path, command_name):
command = shlex.split( command_name )
print( 'command: %s' % command )
command_line_interface = subprocess.Popen( command, stdout=subprocess.PIPE, cwd=absolute_path )
output = command_line_interface.communicate()[0]
print( "\n%s" % output.decode('utf-8') )
return output
def main():
result = run_command( current_directory, "curl -H '%s' https://api.github.com/users/%s/repos" % ( token, user_name ) )
result_json = json.loads( result.decode('utf-8') )
for repository_data in result_json:
repository_full_name = repository_data['full_name']
print( "Processing{:s}".format( repository_full_name ) )
# Now, what do?
run_command( current_directory, "curl -H '%s' https://api.github.com/%s/misterX" % ( token, repository_full_name ) )
if __name__ == "__main__": main()
I think the only thing missing is the complete the last line:
# Now, what do?
run_command( current_directory, "curl -H '%s' https://api.github.com/%s/misterX" % ( token, repository_full_name ) )
After finding How do I rename a GitHub repository via their API? I manage to build the following code:
# Now, what do?
full_command = \
r"""
curl
-H "Authorization: Token %s"
-H "Content-Type: application/json"
-H "Accept: application/json"
-X PATCH
--data '{ "has_issues": true }'
https://api.github.com/repos/:%s
""" % ( token, repository_full_name )
print( 'full_command: %s' % full_command )
run_command( current_directory, full_command )
But GitHub says:
{
"message": "Not Found",
"documentation_url": "https://developer.github.com/v3/repos/#edit"
}
Their API page does not help much: https://developer.github.com/v3/repos/#edit
References:
How to retrieve the list of all github repositories of a person?
https://github.com/settings/tokens GitHub token with full repository access

The answer I used on How do I rename a GitHub repository via their API? was wrong. It was using https://api.github.com/repos/:owner/repo, but it should be https://api.github.com/repos/owner/repo. After fixing that, GitHub kept saying:
{
"message": "Validation Failed",
"errors": [
{
"resource": "Repository",
"code": "custom",
"field": "name",
"message": "name is too short (minimum is 1 character)"
}
],
"documentation_url": "https://developer.github.com/v3/repos/#edit"
}
Then, I added "name": "repository_name" to the json, and it worked. This is this new code:
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import os
import shlex
import json
import subprocess
import shutil
"""
Iterates through all repositories from a user and enable the issue tracker.
"""
# GitHub token with full repository access
# https://github.com/settings/tokens
token = "8217398127859182039802175098213389019766"
user_name = "username"
current_directory = os.path.dirname( os.path.realpath(__file__) )
print( 'directory walk %s' % current_directory )
# The maximum count of repositories to to process when calling this batch script.
maximum_process_limit = 1000
def run_command(absolute_path, command_name):
command = shlex.split( command_name )
print( 'command: %s' % command )
command_line_interface = subprocess.Popen(
command, stdout=subprocess.PIPE, cwd=absolute_path )
output = command_line_interface.communicate()[0]
# print( "%s" % output )
# print( "\n%s" % output.decode('utf-8') )
return output
def main():
page_index = 1
while process_repositories_page( page_index ):
page_index += 1
def process_repositories_page(page_index):
global maximum_process_limit
items_per_page = 100
repositories_text = run_command( current_directory,
"curl -H '%s' https://api.github.com/users/%s/repos?per_page=%s&page=%s" % (
token, user_name, items_per_page, page_index ) )
repositories_json = json.loads( repositories_text.decode('utf-8') )
for repository_data in repositories_json:
print( "Processing repository: %s" % repository_data['full_name'] )
if maximum_process_limit <= 0: return
maximum_process_limit -= 1
full_command = \
r"""
curl
-H "Authorization: Token {token}"
-H "Content-Type: application/json"
-H "Accept: application/json"
-X PATCH
--data '{data}'
https://api.github.com/repos/{full_name}
""".format(
token=token,
data=json.dumps(
{
"name": repository_data['name'],
"has_issues": True
}
),
full_name=repository_data['full_name']
)
print( 'full_command: %s' % full_command )
result = run_command( current_directory, full_command )
print( 'result: %s' % result.decode('utf-8') )
return len( repositories_json ) == items_per_page
if __name__ == "__main__":
main()
New references:
Programmatically enable Github Pages for a repository
Escape double quotes for JSON in Python
Github API v3 doesn't show all user repositories

how to configuration vim-easytags for javascript

I want to use vim-easytags for javascript, so that it can use jsctags to generate tags each time I save my code. From the documentation of vim-easytags, I notice it supports javascript tags and jsctags. But how to set the configuration struggled me. Can anyone help me fix my .vimrc?
let g:easytags_python_enabled=1
let g:easytags_events = ['BufWritePost']
let b:easytags_auto_highlight = 1
let g:easytags_async=1
let g:easytags_by_filetype=1
let g:easytags_languages = {
\ 'javascript': {
\ 'cmd': 'jsctags',
\ 'args': ['-f'],
\ 'fileoutput_opt': '-f',
\ 'stdout_opt': '-f-',
\ 'recurse_flag': '-R'
\ }
\}

This seems to do it for me:
let g:easytags_languages = {
\ 'javascript': {
\ 'cmd': 'jsctags',
\ 'recurse_flag': ''
\ }
\}

Displaying ► character in Vim terminal lightline status bar

I am working on SUSE Linux Enterprise Desktop 11 (x86_64) and I am using Vim in terminal as my editor. I have recently installed a plugin called lightline from https://github.com/itchyny/lightline.vim. The plugin uses special characters to make the status line look like this:
The > part of the bar is actually ► character coloured like the square next to it. The problem is that the bar, in my case, looks like this:
The ► character is not displayed properly, although the encoding is set to UTF-8 and all the required fonts are installed on the system (fonts for powerline). In this case the font set on terminal is Liberation Mono for Powerline.
Lightline settings in my vimrc:
set encoding=utf-8
scriptencoding utf-8
let g:lightline = {
\ 'colorscheme': 'wombat',
\ 'separator': {'left': "\u25B6", 'right': ''},
\ 'subseparator': { 'left': '', 'right': ''}
\ }
I also tried copying the ► character like this
let g:lightline = {
\ 'colorscheme': 'wombat',
\ 'separator': {'left': "►", 'right': ''},
\ 'subseparator': { 'left': '', 'right': ''}
\ }
But it manifests in the same way.
Furthermore, there is a problem with ^ characters wherever there is supposed to be whitespace.
Is there any solution for this?

Following is my my_configs.vim for lightline, it works perfectly in my Fedora 26 system.
let g:lightline = {
\ 'colorscheme': 'wombat',
\ }
let g:lightline = {
\ 'colorscheme': 'wombat',
\ 'active': {
\ 'left': [ ['mode', 'paste'],
\ ['fugitive', 'readonly', 'filename', 'modified'] ],
\ 'right': [ [ 'lineinfo' ], ['percent'] ]
\ },
\ 'component': {
\ 'readonly': '%{&filetype=="help"?"":&readonly?"\ue0a2":""}',
\ 'modified': '%{&filetype=="help"?"":&modified?"\ue0a0":&modifiable?"":"-"}',
\ 'fugitive': '%{exists("*fugitive#head")?fugitive#head():""}'
\ },
\ 'component_visible_condition': {
\ 'readonly': '(&filetype!="help"&& &readonly)',
\ 'modified': '(&filetype!="help"&&(&modified||!&modifiable))',
\ 'fugitive': '(exists("*fugitive#head") && ""!=fugitive#head())'
\ },
\ 'separator': { 'left': "\ue0b0", 'right': "\ue0b2" },
\ 'subseparator': { 'left': "\ue0b1", 'right': "\ue0b3" }
\ } "" This is comment: I fotgot this line in my last post, just added
Sorry for my mistake, I just fixed this config.
If you installed hack font from https://github.com/chrissimpkins/Hack/releases
and install powerline-fonts by command "sudo dnf install powerline-fonts" in Fedora 26 system, you probably want to add the following configs to your
/etc/fonts/local.conf
<?xml version="1.0"?>
<!DOCTYPE fontconfig SYSTEM "fonts.dtd">
<fontconfig>
<alias>
<family>Hack</family>
<prefer>
<family>PowerlineSymbols</family>
</prefer>
</alias>
</fontconfig>

The problem was explained in this thread stackoverflow.com/questions/7223309/. It says that if the stl and stlnc have the same values, they will be replaced with ^^^. It works when you put * for stlnc and whitespace for stl.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Writing to a json file with Polish characters - python-3.x

Related

how to enable emmet in vim for ejs file

Convert a key-value representing string to a list

How to grammatically enable the issue tracker of a given repository I own through the GitHub API?

how to configuration vim-easytags for javascript

Displaying ► character in Vim terminal lightline status bar

Categories

Resources