Scraping multiple links with bs4 and request - python-3.x

I'm new to this please go easy on me. So i want to scrape name and number of multiple pages but only scraping first url. Also the code is scraping only one item either Name Or Phone Number. Name is in "h2" class "c411ListedName" and number is in 'span' class "c411Phone"
import csv
import requests
from bs4 import BeautifulSoup
urls = ['https://www.canada411.ca/search/si/1/kumar/Canada/?pgLen=100','https://www.canada411.ca/search/si/2/kumar/Canada/?pgLen=100']
for url in urls:
response = requests.get(url)
html = requests.get(url).text
soup = BeautifulSoup(response.content, "html.parser")
products = soup.findAll('span','h2', class_=['c411Phone', 'c411ListedName'])
for div in products:
print(div.text)

What happens
Your requesting both urls, but process only the last one, cause your second loop is will not be executed after your first loop is finished.
Solution
Improve your indentation and put the secaonde loop in the first
import csv
import requests
from bs4 import BeautifulSoup
urls = ['https://www.canada411.ca/search/si/1/kumar/Canada/?pgLen=100','https://www.canada411.ca/search/si/2/kumar/Canada/?pgLen=100']
for url in urls:
response = requests.get(url)
html = requests.get(url).text
soup = BeautifulSoup(response.content, "html.parser")
products = soup.findAll('span','h2', class_=['c411Phone', 'c411ListedName'])
for div in products:
print(url,div.text)
Additional solution
Select and process the enclosing <div>
import csv
import requests
from bs4 import BeautifulSoup
urls = ['https://www.canada411.ca/search/si/1/kumar/Canada/?pgLen=100','https://www.canada411.ca/search/si/2/kumar/Canada/?pgLen=100']
data=[]
for url in urls:
response = requests.get(url)
html = requests.get(url).text
soup = BeautifulSoup(response.content, "html.parser")
for row in soup.select('div.c411Listing.jsResultsList > div.listing__row:nth-of-type(1)'):
data.append({
'name':row.h2.a.text,
'number':row.span.text
})
data

You should do two things:
Put the products out of your for as an array to be able to store all urls' content
Use findall separately for each h2 and span
Here is the code:
import requests
from bs4 import BeautifulSoup
urls = ['https://www.canada411.ca/search/si/1/kumar/Canada/?pgLen=100',
'https://www.canada411.ca/search/si/2/kumar/Canada/?pgLen=100']
products_h = []
products_span = []
for url in urls:
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
products_h.append(soup.findAll('span', class_=['c411Phone']))
products_span.append(soup.findAll('h2', class_=['c411ListedName']))
for i in range(len(urls)):
for span, h in zip(products_span[i], products_h[i]):
print(f'{span.text},{h.text}')
Here is the result that I got:
B Kumar,(416) 444-5781
V Roy Kumar,(613) 226-2668
C Kumar,(514) 388-4472
Ananth Kumar,(905) 389-2429
ASHOK KUMAR,(905) 575-7795
R Kumar,(905) 820-2447
A Kumar,(613) 736-1882
S R Kumar,(705) 426-9866
R Kumar,(519) 886-9588
S & H Kumar,(905) 303-6949
V KUMAR,(613) 834-1453
Ashwani Kumar,(867) 633-5854
S Kumar,(514) 620-1780
V Kumar,(514) 421-8659
B C M Kumar,(905) 335-4480
Vasant Kumar,(613) 723-3485
MICHAEL KUMAR,(519) 824-8555
M Kumar,(613) 747-3790
G Kumar,(905) 265-2017
T Kumar,(905) 824-8625
A Kumar,(905) 508-1561
U Kumar,(905) 433-2165
J Kumar,(905) 426-5249
Priya Kumar,(905) 827-0090
C Kumar,(905) 216-4078
M Kumar,(905) 456-6423
R Kumar,(647) 827-5029
N Kumar,(647) 350-2730
Sanjai Kumar,(416) 913-3648
R Kumar,(905) 916-2177
S Kumar,(289) 232-5477
J Kumar,(416) 238-9504
P Kumar,(905) 956-1699
R Kumar,(905) 793-9551
S Kumar,(905) 848-8282
Naresh Kumar,(905) 257-6640
K Kumar-Telang,(905) 450-1745
Poonam Kumar,(905) 937-6120
R Kumar,(416) 741-1314
A Kumar,(905) 454-4490
A Kumar,(905) 799-8742
Subodh Kumar,(416) 239-4590
R Kumar,(905) 888-0007
S Kumar,(416) 421-2311
R Kumar,(905) 654-8063
N Kumar,(905) 295-4343
R Kumar,(905) 303-4532
S Kumar,(519) 653-0474
Aneal Kumar,(905) 792-1051
Unesh Kumar,(905) 451-3614
Virender Kumar,(416) 483-9116
R Kumar,(905) 796-3413
M Kumar,(519) 691-1176
S SAMPATH-kUMAR,(416) 745-7911
S Kumar,(416) 724-5511
R KUMAR,(416) 256-7991
C Kumar,(905) 509-0211
A Kumar,(905) 455-2071
Ram Kumar,(905) 829-4686
D Kumar,(905) 608-1849
R Kumar,(416) 283-9395
M Kumar,(905) 456-0497
P Kumar,(416) 284-5028
M Kumar,(705) 724-3507
Pradeep Kumar,(613) 548-4619
R Kumar,(416) 481-6271
V Kumar,(905) 451-0797
Rakesh Kumar,(416) 626-8349
J Kumar,(416) 604-4806
A Kumar,(416) 759-7526
A Kumar,(905) 460-9268
Parapurath S Kumar,(905) 794-3673
N Kumar,(905) 642-9708
Praveen Kumar,(647) 827-1562
V Kumar,(905) 488-7531
A Kumar,(905) 901-0050
R Kumar,(905) 216-7945
Praveen Kumar,(416) 238-1247
J Kumar,(905) 265-7537
Praveen Kumar,(416) 855-3744
S Kumar,(905) 792-7671
Ragesh Kumar,(647) 727-5826
Goutham Kumar,(905) 670-9268
V Kumar,(519) 472-2394
Dharmana Kumar,(905) 908-2216
K Kumar,(905) 654-7177
J Kumar,(905) 554-2210
Praveen Kumar,(416) 855-3091
D Kumar,(416) 901-7977
Parmod Kumar,(905) 456-2286
S Kumar,(416) 674-5392
Suby Kumar,(905) 831-8886
A Kumar,(416) 282-0182
A Kumar,(905) 915-7124
Surinder Kumar,(905) 264-7743
Rajesh Kumar,(416) 855-1081
Selva Kumar,(905) 769-4456
Santhosh Kumar,(905) 769-4503
Praveen Kumar,(416) 855-1880
K Kumar,(416) 467-6727
J Kumar,(905) 453-9596
C Kumar,(905) 683-5983
k kumar,(416) 746-2705
A Kumar,(416) 667-9363
Praveen Kumar,(416) 855-0367
J Kumar,(905) 666-4988
Ashok N Kumar,(905) 877-2302
V Chandra Kumar,(416) 724-0156
D Kumar,(416) 332-8825
S Kumar,(905) 476-1995
Maria Kumar,(905) 499-4918
S KUMAR,(905) 814-0518
P Kumar,(905) 522-1386
SURESH KUMAR,(514) 693-0564
S Kumar,(514) 932-1504
Santosh Kumar,(416) 855-3747
p kumar,(905) 604-0670
V R Kumar,(416) 766-3335
Satish Kumar,(416) 406-2340
J Kumar,(905) 854-2337
Sam Kumar,(905) 854-2342
R Kumar,(416) 593-4306
M Kumar,(905) 338-7543
Nita Kumar,(905) 653-0474
Rajender Kumar,(705) 522-2213
R Kumar,(416) 438-3855
V KUMAR,(705) 522-6114
Raman Kumar,(905) 886-0558
B Kumar,(416) 445-4501
S Kumar,(416) 246-9104
S Kumar,(905) 792-7124
S Kumar,(905) 495-3315
D Kumar,(416) 742-8025
R Kumar,(905) 824-8170
Pj Kumar,(905) 785-3110
Rai Bal Kumar,(418) 522-8497
S Kumar,(514) 620-0626
S Kumar,(705) 352-0418
V KUMAR,(647) 345-3130
R Kumar,(416) 208-7965
R Kumar,(226) 663-3309
R Kumar,(905) 553-6099
K Kumar,(905) 566-5855
P Kumar Rudra,(905) 565-6553
Rashmi Kumar,(905) 363-7578
Anil Kumar,(905) 497-6514
Shiv Kumar,(416) 265-5763
Aprul Kumar,(416) 240-9379
A Kumar,(289) 752-7462
P Kumar,(416) 744-3493
T Melburn-Kumar,(905) 524-5529
V Kumar,(416) 724-7624
Poonam Kumar,(905) 430-1763
C Kumar,(905) 470-1296
V Kumar,(905) 237-8979
M KUMAR,(905) 458-1234
R Kumar,(416) 335-0731
A Kumar,(416) 213-9003
M Kumar,(905) 564-5779
S Kumar,(905) 890-3061
P & R Kumar,(905) 726-4444
L Kumar,(416) 438-3283
R Kumar,(613) 841-4638
R KUMAR,(416) 490-9860
K Kumar,(226) 647-1075
J Kumar,(905) 654-1532
S Kumar,(905) 472-4343
D Kumar,(905) 956-7503
Jagjit Kumar,(905) 915-8826
Vimala Krishna Kumar,(416) 551-3021
R Kumar,(647) 340-0236
A Kumar,(905) 791-8114
Santosh Kumar,(416) 431-2661
Vijay Kumar,(905) 487-9475
Dr Vinod Dr Uma Kumar,(613) 737-4113
S Kumar,(905) 281-8837
L KUMAR,(905) 860-2970
Mifan Kumar,(905) 201-0539
S Kumar,(416) 746-5671
Padin Kumar,(905) 881-3870
Shiue Kumar,(905) 499-0950
M Kumar,(905) 216-8303
P Kumar,(289) 752-6480
A Kumar,(613) 837-3511
R Kumar,(905) 846-7810
R Kumar,(416) 286-5743
E Kumar,(613) 440-2647
S Kumar,(905) 840-1426
V Kumar,(819) 681-6376
M Kumar,(416) 287-1812
D Kumar,(705) 352-2627
S Kumar,(905) 455-2242
Raj Kumar,(438) 288-3934
A Kumar,(289) 752-2619
S Kumar,(416) 284-0542
S Kumar,(705) 352-5001
V Kumar,(905) 820-6636
P KUMAR,(514) 342-4183
A Kumar,(519) 256-2012
P Kumar,(416) 412-7972
Hope it was helpful.

Related

Is it possible to intercept the encrypted HTTP body in Biztalk AS2 connection?

Is it possible to see the full HTTP request that was made towards an AS2 endpoint?
I tried Failed requests tracing from IIS but it does not show the body (probably because it is encrypted).
I would like to be able to replay the AS2 call using something like this:
POST http://aaa.com/PARTY/BTSHTTPReceive.dll HTTP/1.1
Content-Type: application/pkcs7-mime; smime-type=enveloped-data; name="smime.p7m"
Disposition-Notification-Options: signed-receipt-protocol=required,pkcs7-signature; signed-receipt-
micalg=required,sha1
AS2-Version: 1.2
Content-Transfer-Encoding: binary
Mime-Version: 1.0
Receipt-Delivery-Option: http://IPADDRESS/BiztalkMDN/BTSHTTPReceive.dll
EDIINT-Features: multiple-attachments
AS2-To: SOURCE
Message-ID: <NC-AZ-HQ-BZ-ACC_19ED90F5-1E8F-455F-A800-22917F2C5DF0>
AS2-From: DESTINATION
Disposition-Notification-To: http://IPADRESS/BiztalkMDN/BTSHTTPReceive.dll
User-Agent: Microsoft (R) BizTalk (R) Server
Host: ...
Content-Length: 2509
Expect: 100-continue
Connection: Close
0 * H
0 1 Q0 M 050!10Uaaa.com- JΎKJ 7J- *0
* H
f m zG$ jF9 Q1 : d, = T ׻r E 5 C #/ qx M kR [ mV / NǦ ÿ
a. =]: {yB_sgV [ `o zڛN L 1 ߭ R q jUۮy '/ p 7t :ƒ* l/Q " O QCF \ =cA# j (mV m x 3 = 'P & m(RR҉h0 K ) + #F pˮ PU0 Z * H
0* H
0
: 5& c.퀂0—D w | Θ y cO2 M z< pQL m 7 +%b fH *`f 8͚ ~=ԙ4b &9! b B # l1 " z \ Q S- . } " cZv >N.n% ̰| 3 a: :[1ӏZ k , W ̩J qjʉ =%7
7 l m ׵
b Inx : =Q p 1 + Gs
:) T; .O Uf Chڑ i , E HV+ ߣ G d g „{ V. e Q .L < vx T
; t < pA
j^J T O_ 9* [v=g4l c `5 Ԃ' H YS]߉ ' Ob $ 3 z 4 5 2 \3$IL : : T iy7; ԊE Z!{
[ \#x w k( OaaZ: | / h" U :! \u 4 V 8 v " M ϙv x "zL… p yĹ 2> +"p Ċ > t> v ӎ j l
ąN)j , { ǗG5 +UK y ~ 9 ڣ: A ֙ (# Չ 4
! .7y Aۣ . J j bhX .-j ) Z8( m^{=t哑[ | ^ Z )U 0[9V 5 L R3 , o ՝fj H!x 1J6 ( [F N <oq I ͓C a ': ˅ Ϊ ss{ ? hJ46 { Y>1 #- ֳ} 9* ʪ z tW U g8 o
De b ) 0SD6 7 ) I N yW| f s e qUz.q 55 ]! 2 y 'Ў F) y e # F a3 h
<k1 s 3 5 6 2 >6 2I5z|SNY / e 2?P ѽ. -U +
h x >2 m/Œ f cYSD ˇA 2u + ' 7CDT"#
h %; ~x R g Y K %> ?g| +h-:< , vT( c!1J h5 0 ) j T G 1 ʋL * ad 0pAv~XS - # ' gK~ u +fS | zr U U ҽ C ##m ӑ $ K CZ ̀ Y MixO 8 # IB %WwT: H M2 >˪ R Myn 0h$ 9# ' τ 7=+Vюx ` W{ b P Ob
KB
"қ pýG ;= 2UD ]ƜY+ bZ8) R +ٔL t QN Q
4ڎ #nJ+byK A p , U FS`i wY` U NwƽU< 27 ]* { 'CV ?QǓ L=`#C (! ^ 7k EJ? $ C )% f= A x L )^P t !Tu
~^A] 8" E/: t 3 P 5iG# "|J & U [MC $WUS&g %h)5 Ę mI ~ ÷ /mC Mq nNĝ Ǩ j *. }o E/ y G ד 1 h ?D m %˷ ^ DXy- mj_ [ dT
SM珍
k?[ - i ; 3i B[W͵ <w R YP; KGaF . Up}^ : .W " 3 뙲E _ hI }
SG 3 ι Œ hd * ++_Gz bc; D #
و y p ۧy A D k ŧ ? 5 t ^ <. . I' .Vp /&nVo N y0 b K 0 3QE
L vw tθ S /w 9 K նB x2Q#+Tj fC F R c
<Ad/ 1tlSfպ- w O0o =) T鏺 Q

Identify the cause of \r artifacts when reading from a serial terminal?

I'm trying to diagnose what is the cause of \r artifacts when reading from a serial terminal.
The following code can provoke the issue. There is an embedded Linux device connected to the physical end of the uart cable
import serial
ser = serial.Serial(
port='/dev/ttyUSB0',
baudrate=115200,
timeout=5)
ser.reset_input_buffer()
ser.reset_output_buffer()
b_NEW_LINE_WRITTEN = b'\n'
b_alphabet = b'A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q R r S s T t U u V v W w X x Y y Z z'
ser.write(b_alphabet + b_NEW_LINE_WRITTEN)
raw_line = ser.readline()
print(raw_line)
# b'A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q\rq R r S s T t U u V v W w X x Y y Z z\r\n'
The print the read data has an \r around the letter q.
When running a terminal emulator on top of /dev/ttyUSB0 the line starts wrapping around itself after 80 characters, similarly to
this question. This line wrapping seems to be a manifestation of the /r that I get when directly reading with the example python code.
When connected with a terminal emulator (picocom) this problem gets solved by running:
shopt -s checkwinsize
resize
there are no line wraps after that. I'm trying to understand why this is happening or where (is the host where /dev/tttyUSB0 what adds them or are they added by the embedded device being accessed), and how to workaround it without having to run external commands.
The following candidates could be causing the behavior:
driver settings for /dev/ttyUSB0 on the shot ps
driver settings for /dev/S0 on the target embedded device
shell on the target device
Setting /dev/ttyUSB0
Trying to modify /dev/ttyUSB0 in separate terminal while the device is consumed by the python script doesn't show any changes.
# Separate terminal on the host
stty -F /dev/ttyUSB0 cols 100
stty -F /dev/ttyUSB0 raw
stty -F /dev/ttyUSB0 -a
speed 115200 baud; rows 80; columns 100; line = 0;
intr = ^C; quit = ^\; erase = ^?; kill = ^U; eof = ^A; eol = <undef>; eol2 = <undef>; swtch = <undef>;
start = ^Q; stop = ^S; susp = ^Z; rprnt = ^R; werase = ^W; lnext = ^V; discard = ^O;
min = 1; time = 0;
-parenb -parodd -cmspar cs8 hupcl -cstopb cread clocal -crtscts
-ignbrk -brkint -ignpar -parmrk -inpck -istrip -inlcr -igncr -icrnl -ixon -ixoff -iuclc -ixany
-imaxbel -iutf8
-opost -olcuc -ocrnl -onlcr -onocr -onlret -ofill -ofdel nl0 cr0 tab0 bs0 vt0 ff0
-isig -icanon -iexten -echo -echoe -echok -echonl -noflsh -xcase -tostop -echoprt -echoctl -echoke
-flusho -extproc
#ipython
...
ser.write(b_alphabet + b_NEW_LINE_WRITTEN)
raw_line = ser.readline()
print(raw_line)
# b'A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q\rq R r S s T t U u V v W w X x Y y Z z\r\n'
Setting /dev/S0
Setting the target tty device also doesn't influence the artifacts existence.
stty_cmd_set = b'stty cols 200'
ser.write(stty_cmd_set + b_NEW_LINE_WRITTEN)
ser.reset_input_buffer()
ser.reset_output_buffer()
stty_cmd_confirm = b'stty -a'
ser.write(stty_cmd_confirm + b_NEW_LINE_WRITTEN)
# After reading a few lines there is a confirmation that the tty device on the target has indeed been set to 200
print(ser.readline())
b'speed 115200 baud; rows 56; columns 200; line = 0;\r\n'
ser.reset_input_buffer()
ser.reset_output_buffer()
ser.write(b_alphabet + b_NEW_LINE_WRITTEN)
raw_line = ser.readline()
print(raw_line)
# b'A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q\rq R r S s T t U u V v W w X x Y y Z z\r\n'
For example a workaround would be to somehow set a fixed amount of 200 columns before reading so that the serial terminal stops trying to be smart.
Reading between the lines, you are running something like a Linux shell on your embedded device.
This will have the same set of Linux app/TTY/driver hierarchy as your host and is defaulting to cooked mode processing of the input that it receives from your application. This is why running the command to change the number of columns (on your embedded application) works. It is telling the line discipline in that device to treat the screen as being 200 columns wide (and so the line-editing logic doesn't need to split the line).
Switching to raw input on both your host and embedded shell should fix it.
If you'd like to know more details of how Linux terminals handle input (and echoing of input back to the output stream), see https://www.linusakesson.net/programming/tty/
It is not clear what is actually happening but this is solution that seems to work.
It depends on existence of shopt -s checkwinsize and resize on the target board so it isn't a generic enough solution to be the accepted answer.
Also it doesn't provide an insight on how to apply a not runtime fix (by setting the driver defaults or some config in bash).
import serial
import time
ser = serial.Serial(
port='/dev/ttyUSB0',
baudrate=115200,
timeout=5)
b_NEW_LINE_WRITTEN = b'\n'
b_NEW_LINE_READ = b'\r\n'
b_alphabet = b'A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q R r S s T t U u V v W w X x Y y Z z'
ser.write(b_alphabet + b_NEW_LINE_WRITTEN)
print(ser.readline())
# b'A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q\rq R r S s T t U u V v W w X x Y y Z z\r\n'
shopt_cmd = b'shopt -s checkwinsize'
ser.write(shopt_cmd + b_NEW_LINE_WRITTEN)
ser.readline()
ser.readline()
resize_cmd = b'resize'
ser.write(resize_cmd + b_NEW_LINE_WRITTEN)
ser.readline()
ser.readline()
stty_cmd_set = b'stty cols 200'
ser.write(stty_cmd_set + b_NEW_LINE_WRITTEN)
ser.readline()
ser.readline()
ser.write(b_alphabet + b_NEW_LINE_WRITTEN)
print(ser.readline())
# b'A a B b C c D d E e F f G g H h I i J j K k L l M m N n O o P p Q q R r S s T t U u V v W w X x Y y Z z\r\n'
ser.reset_output_buffer()
stty_cmd_set = b'stty cols 5'
ser.write(stty_cmd_set + b_NEW_LINE_WRITTEN)
ser.readline()
ser.readline()
ser.write(b_alphabet + b_NEW_LINE_WRITTEN)
print(ser.readline())
# A a B \r b C c\rc D d \r E e F\rF f G \r g H h\rh I i \r J j K\rK k L \r l M m\rm N n \r O o P\rP p Q \r q R r\rr S s \r T t U\rU u V \r v W w\rw X x \r Y y Z\rZ z\r\n'
\r q is happening because your screen ends near q so it is moved to next line its carriage return of Unix. unix carriage return can be used as new line ,go to next line end of line both . Replace \r with empty.
Try auto wrap off
\r (Carriage Return) → moves the cursor to the beginning of the line without advancing to the next line
\n (Line Feed) → moves the cursor down to the next line without returning to the beginning of the line — In a *nix environment \n moves to the beginning of the line.
\r\n (End Of Line) → a combination of \r and \n

/snap/bin cutoff my /etc/environment path variable settings

I am using UBUNTU 16.04 and currently keeping my path variable settings in /etc/environment settings, it was all fine until my path setting reaches a specific length, /snap/bin would just cutoff in my $PATH variable, causing the necessary to source the /etc/environment everytime when I log in again...
This is my /etc/environment path
echo "$PATH" | tr ':' '\n'
/usr/local/sbin
/usr/local/bin
/usr/sbin
/usr/bin
/sbin
/bin
/usr/games
/usr/local/games
/opt/vcftools_0.1.13/bin
/opt/PLINK_v1.90b6.5
/opt/FastQC
/opt/GATK-4.0.6
/opt/samtools-1.9/bin
/opt/samtools-1.9/htslib-1.9
/opt/bcftools-1.9/bin
/opt/bedtools2/bin
/opt/cmake-3.12.3/bin
/opt/bamtools-final/usr/local/bin
/opt/hmmer-3.2.1-final/bin
/opt/Augustus/bin
/opt/Augustus/scripts
/opt/R-3.5.1-final/bin
/opt/busco/scripts
/opt/Platanus_v1.2.4
/opt/jemalloc-final/bin
/opt/discovardenovo-52488-final/bin
/opt/quast-5.0.1
/opt/gffread-0.9.12
/opt/hisat2-2.1.0
/opt/GapCloser-v1.12-r6/bin
/opt/SOAPdenovo2-bin-LINUX-generic-r240
/opt/trf-4.09
/opt/rmblast-2.6.0/bin
/opt/RECON-1.08/bin
/opt/RepeatScout-1.0.5
/opt/RepeatMasker
/opt/RepeatModeler-open-1.0.11
/opt/satsuma-code-0
/opt/minimap2
/opt/twoBitToFa-20181128
/opt/gt-1.5.10-Linux_x86_64-64bit-complete/bin
/opt/supernova-2.1.1
/opt/gm_et_linux_64/gmes_petap
/opt/gth-1.7.1-Linux_x86_64-64bit/bin
/opt/exonerate-2.4.0/bin
/opt/BRAKER/scripts
/opt/bcl2fastq-final/bin
/opt/longranger-2.2.2/longranger-cs/2.2.2/bin
/opt/TransDecoder-TransDecoder-v5.5.0
/opt/blast-2.2.26/bin
/opt/kraken2
/opt/inparanoid_4.1
/home/cch/.local/bin
Like, up until bcl2fastq's pathway /snap/bin would attach correctly after the whole path variable stream (I tried to delete the string to make it shorter), but expansion upon that /snap/bin would just cut off the string in the middle:
cch#ubuntu16:~$ echo "$PATH" | tr ':' '\n'
/home/cch/bin
/home/cch/.local/bin
/usr/local/sbin
/usr/local/bin
/usr/sbin
/usr/bin
/sbin
/bin
/usr/games
/usr/local/games
/opt/vcftools_0.1.13/bin
/opt/PLINK_v1.90b6.5
/opt/FastQC
/opt/GATK-4.0.6
/opt/samtools-1.9/bin
/opt/samtools-1.9/htslib-1.9
/opt/bcftools-1.9/bin
/opt/bedtools2/bin
/opt/cmake-3.12.3/bin
/opt/bamtools-final/usr/local/bin
/opt/hmmer-3.2.1-final/bin
/opt/Augustus/bin
/opt/Augustus/scripts
/opt/R-3.5.1-final/bin
/opt/busco/scripts
/opt/Platanus_v1.2.4
/opt/jemalloc-final/bin
/opt/discovardenovo-52488-final/bin
/opt/quast-5.0.1
/opt/gffread-0.9.12
/opt/hisat2-2.1.0
/opt/GapCloser-v1.12-r6/bin
/opt/SOAPdenovo2-bin-LINUX-generic-r240
/opt/trf-4.09
/opt/rmblast-2.6.0/bin
/opt/RECON-1.08/bin
/opt/RepeatScout-1.0.5
/opt/RepeatMasker
/opt/RepeatModeler-open-1.0.11
/opt/satsuma-code-0
/opt/minimap2
/opt/twoBitToFa-20181128
/opt/gt-1.5.10-Linux_x86_64-64bit-complete/bin
/opt/supernova-2.1.1
/opt/gm_et_linux_64/gmes_petap
/opt/gth-1.7.1-Linux_x86_64-64bit/bin
/opt/exonerate-2.4.0/bin
/opt/BRAKER/scripts
/opt/bcl2fastq-final/b
/snap/bin
This is my /etc/profile.d/apps-bin-path.sh which apparently has the code bits that append the /snap/bin to your path, but I'm not sure whether if I should alter it because it seems to be important
# shellcheck shell=sh
# Expand $PATH to include the directory where snappy applications go.
snap_bin_path="/snap/bin"
if [ -n "${PATH##*${snap_bin_path}}" -a -n "${PATH##*${snap_bin_path}:*}" ]; then
export PATH=$PATH:${snap_bin_path}
fi
# Ensure base distro defaults xdg path are set if nothing filed up some
# defaults yet.
if [ -z "$XDG_DATA_DIRS" ]; then
export XDG_DATA_DIRS="/usr/local/share:/usr/share"
fi
# Desktop files (used by desktop environments within both X11 and Wayland) are
# looked for in XDG_DATA_DIRS; make sure it includes the relevant directory for
# snappy applications' desktop files.
snap_xdg_path="/var/lib/snapd/desktop"
if [ -n "${XDG_DATA_DIRS##*${snap_xdg_path}}" -a -n "${XDG_DATA_DIRS##*${snap_xdg_path}:*}" ]; then
export XDG_DATA_DIRS="${XDG_DATA_DIRS}:${snap_xdg_path}"
fi
This is my default setting from /etc/login.defs, but I think my /etc/environment also applies everytime I log in
# *REQUIRED* The default PATH settings, for superuser and normal users.
#
# (they are minimal, add the rest in the shell startup files)
ENV_SUPATH PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV_PATH PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games
Do anyone has any idea on this???
Thanks in advance for your help
I did echo -n "$PATH"|od -xc following the suggestiong from cdarke and tripleee
this is the output when I re-login and the $PATH is interrupted by /snap/bin
0000000 682f 6d6f 2f65 6363 2f68 6962 3a6e 682f
/ h o m e / c c h / b i n : / h
0000020 6d6f 2f65 6363 2f68 6c2e 636f 6c61 622f
o m e / c c h / . l o c a l / b
0000040 6e69 2f3a 7375 2f72 6f6c 6163 2f6c 6273
i n : / u s r / l o c a l / s b
0000060 6e69 2f3a 7375 2f72 6f6c 6163 2f6c 6962
i n : / u s r / l o c a l / b i
0000100 3a6e 752f 7273 732f 6962 3a6e 752f 7273
n : / u s r / s b i n : / u s r
0000120 622f 6e69 2f3a 6273 6e69 2f3a 6962 3a6e
/ b i n : / s b i n : / b i n :
0000140 752f 7273 672f 6d61 7365 2f3a 7375 2f72
/ u s r / g a m e s : / u s r /
0000160 6f6c 6163 2f6c 6167 656d 3a73 6f2f 7470
l o c a l / g a m e s : / o p t
0000200 762f 6663 6f74 6c6f 5f73 2e30 2e31 3331
/ v c f t o o l s _ 0 . 1 . 1 3
0000220 622f 6e69 2f3a 706f 2f74 4c50 4e49 5f4b
/ b i n : / o p t / P L I N K _
0000240 3176 392e 6230 2e36 3a35 6f2f 7470 462f
v 1 . 9 0 b 6 . 5 : / o p t / F
0000260 7361 5174 3a43 6f2f 7470 472f 5441 2d4b
a s t Q C : / o p t / G A T K -
0000300 2e34 2e30 3a36 6f2f 7470 732f 6d61 6f74
4 . 0 . 6 : / o p t / s a m t o
0000320 6c6f 2d73 2e31 2f39 6962 3a6e 6f2f 7470
o l s - 1 . 9 / b i n : / o p t
0000340 732f 6d61 6f74 6c6f 2d73 2e31 2f39 7468
/ s a m t o o l s - 1 . 9 / h t
0000360 6c73 6269 312d 392e 2f3a 706f 2f74 6362
s l i b - 1 . 9 : / o p t / b c
0000400 7466 6f6f 736c 312d 392e 622f 6e69 2f3a
f t o o l s - 1 . 9 / b i n : /
0000420 706f 2f74 6562 7464 6f6f 736c 2f32 6962
o p t / b e d t o o l s 2 / b i
0000440 3a6e 6f2f 7470 632f 616d 656b 332d 312e
n : / o p t / c m a k e - 3 . 1
0000460 2e32 2f33 6962 3a6e 6f2f 7470 622f 6d61
2 . 3 / b i n : / o p t / b a m
0000500 6f74 6c6f 2d73 6966 616e 2f6c 7375 2f72
t o o l s - f i n a l / u s r /
0000520 6f6c 6163 2f6c 6962 3a6e 6f2f 7470 682f
l o c a l / b i n : / o p t / h
0000540 6d6d 7265 332d 322e 312e 662d 6e69 6c61
m m e r - 3 . 2 . 1 - f i n a l
0000560 622f 6e69 2f3a 706f 2f74 7541 7567 7473
/ b i n : / o p t / A u g u s t
0000600 7375 622f 6e69 2f3a 706f 2f74 7541 7567
u s / b i n : / o p t / A u g u
0000620 7473 7375 732f 7263 7069 7374 2f3a 706f
s t u s / s c r i p t s : / o p
0000640 2f74 2d52 2e33 2e35 2d31 6966 616e 2f6c
t / R - 3 . 5 . 1 - f i n a l /
0000660 6962 3a6e 6f2f 7470 622f 7375 6f63 732f
b i n : / o p t / b u s c o / s
0000700 7263 7069 7374 2f3a 706f 2f74 6c50 7461
c r i p t s : / o p t / P l a t
0000720 6e61 7375 765f 2e31 2e32 3a34 6f2f 7470
a n u s _ v 1 . 2 . 4 : / o p t
0000740 6a2f 6d65 6c61 6f6c 2d63 6966 616e 2f6c
/ j e m a l l o c - f i n a l /
0000760 6962 3a6e 6f2f 7470 642f 7369 6f63 6176
b i n : / o p t / d i s c o v a
0001000 6472 6e65 766f 2d6f 3235 3834 2d38 6966
r d e n o v o - 5 2 4 8 8 - f i
0001020 616e 2f6c 6962 3a6e 6f2f 7470 712f 6175
n a l / b i n : / o p t / q u a
0001040 7473 352d 302e 312e 2f3a 706f 2f74 6667
s t - 5 . 0 . 1 : / o p t / g f
0001060 7266 6165 2d64 2e30 2e39 3231 2f3a 706f
f r e a d - 0 . 9 . 1 2 : / o p
0001100 2f74 6968 6173 3274 322d 312e 302e 2f3a
t / h i s a t 2 - 2 . 1 . 0 : /
0001120 706f 2f74 6147 4370 6f6c 6573 2d72 3176
o p t / G a p C l o s e r - v 1
0001140 312e 2d32 3672 622f 6e69 3a2f 6f2f 7470
. 1 2 - r 6 / b i n / : / o p t
0001160 532f 414f 6450 6e65 766f 326f 622d 6e69
/ S O A P d e n o v o 2 - b i n
0001200 4c2d 4e49 5855 672d 6e65 7265 6369 722d
- L I N U X - g e n e r i c - r
0001220 3432 3a30 6f2f 7470 742f 6672 342d 302e
2 4 0 : / o p t / t r f - 4 . 0
0001240 3a39 6f2f 7470 722f 626d 616c 7473 322d
9 : / o p t / r m b l a s t - 2
0001260 362e 302e 622f 6e69 2f3a 706f 2f74 4552
. 6 . 0 / b i n : / o p t / R E
0001300 4f43 2d4e 2e31 3830 622f 6e69 2f3a 706f
C O N - 1 . 0 8 / b i n : / o p
0001320 2f74 6552 6570 7461 6353 756f 2d74 2e31
t / R e p e a t S c o u t - 1 .
0001340 2e30 3a35 6f2f 7470 522f 7065 6165 4d74
0 . 5 : / o p t / R e p e a t M
0001360 7361 656b 3a72 6f2f 7470 522f 7065 6165
a s k e r : / o p t / R e p e a
0001400 4d74 646f 6c65 7265 6f2d 6570 2d6e 2e31
t M o d e l e r - o p e n - 1 .
0001420 2e30 3131 2f3a 706f 2f74 6173 7374 6d75
0 . 1 1 : / o p t / s a t s u m
0001440 2d61 6f63 6564 302d 2f3a 706f 2f74 696d
a - c o d e - 0 : / o p t / m i
0001460 696e 616d 3270 2f3a 706f 2f74 7774 426f
n i m a p 2 : / o p t / t w o B
0001500 7469 6f54 6146 322d 3130 3138 3231 3a38
i t T o F a - 2 0 1 8 1 1 2 8 :
0001520 6f2f 7470 672f 2d74 2e31 2e35 3031 4c2d
/ o p t / g t - 1 . 5 . 1 0 - L
0001540 6e69 7875 785f 3638 365f 2d34 3436 6962
i n u x _ x 8 6 _ 6 4 - 6 4 b i
0001560 2d74 6f63 706d 656c 6574 622f 6e69 2f3a
t - c o m p l e t e / b i n : /
0001600 706f 2f74 7573 6570 6e72 766f 2d61 2e32
o p t / s u p e r n o v a - 2 .
0001620 2e31 3a31 6f2f 7470 672f 5f6d 7465 6c5f
1 . 1 : / o p t / g m _ e t _ l
0001640 6e69 7875 365f 2f34 6d67 7365 705f 7465
i n u x _ 6 4 / g m e s _ p e t
0001660 7061 2f3a 706f 2f74 7467 2d68 2e31 2e37
a p : / o p t / g t h - 1 . 7 .
0001700 2d31 694c 756e 5f78 3878 5f36 3436 362d
1 - L i n u x _ x 8 6 _ 6 4 - 6
0001720 6234 7469 622f 6e69 2f3a 706f 2f74 7865
4 b i t / b i n : / o p t / e x
0001740 6e6f 7265 7461 2d65 2e32 2e34 2f30 6962
o n e r a t e - 2 . 4 . 0 / b i
0001760 3a6e 6f2f 7470 422f 4152 454b 2f52 6373
n : / o p t / B R A K E R / s c
0002000 6972 7470 3a73 6f2f 7470 622f 6c63 6632
r i p t s : / o p t / b c l 2 f
0002020 7361 7174 662d 6e69 6c61 622f 2f3a 6e73
a s t q - f i n a l / b : / s n
0002040 7061 622f 6e69
a p / b i n
0002046
and I think it's the same for the original /etc/environment file

Parsing output of wmic in Inno Setup

I have recently used Inno Setup for my Java software. I am writing a function to check whether a printer driver exists by calling wmic printer get name /All and reading it output. But the problem is when I am reading the text file and check if it contains a specific substring by Pos(), it always returning 0, but when I tried to test with a character it returned the true value. I'm currently using version 5.6.1 Unicode.
I have looked at Delphi Pos always returning 0 but I think it's not my case:
Here is how I did it:
function isContainedInFile(File, Substring: String): Boolean;
var
Lines: TArrayOfString;
i: Integer;
line: String;
begin
Substring := Uppercase(Substring);
Result := False;
if LoadStringsFromFile(File, Lines) then
begin
for i:= 0 to GetArrayLength(Lines) - 1 do
begin
line := Lines[i];
if (Length(line) = 0) then
continue;
line := Uppercase(Trim(line));
Log('Substring:' + Substring + ', Line:' + line + ', Pos:' + IntToStr(Pos(Substring, line)));
if (Pos(Substring, line) <> 0) then
begin
Result:= True;
break;
end;
end;
end;
end;
This is how I called the isContainedInFile():
function IsBrotherDriverInstalled(): Boolean;
var
path, brotherPath, ListPrinterPath, ListPrinter: String;
check, index: Integer;
begin
ListPrinterPath := ExpandConstant('{tmp}\printerlist.tdm');
{ Save temporarily the list }
Exec(ExpandConstant('{cmd}'), '/c wmic printer get name /All > "' + ListPrinterPath + '"',
'', SW_HIDE, ewWaitUntilTerminated, check);
{ Check if the list has the printer }
Result := isContainedInFile(ListPrinterPath, PrinterName);
{ Delete the file }
DeleteFile(ListPrinterPath);
end;
Here is my output when the substring has length > 1:
And when the substring has length = 1:
Thanks in advance.
wmic uses UTF-16 encoding in its output. LoadStringsFromFile does not support UTF-16 encoding. See Inno Setup Pascal Script - Reading UTF-16 file.
So the file is read incorrectly.
You seem to be using Inno Script Studio IDE. Its Messages console does not print messages accurately, so it obfuscates the real problem. Had you used original Inno Setup Compiler IDE or checked a physical log file, you would see the problem straight away:
2018-08-26 10:44:35.783 Substring:BROTHER, Line:ÿþN A M E, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:S E N D T O O N E N O T E 2 0 1 6, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:S A M S U N G S C X - 3 4 0 0 S E R I E S ( U S B 0 0 1 ), Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:M S P U B L I S H E R C O L O R P R I N T E R, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:M I C R O S O F T X P S D O C U M E N T W R I T E R, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:M I C R O S O F T P R I N T T O P D F, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:H P E P R I N T + J E T A D V A N T A G E, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:F A X, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:, Pos:0
2018-08-26 10:44:35.783 Substring:BROTHER, Line:, Pos:0
Solutions:
Read UTF-16 file correctly. See Inno Setup Pascal Script - Reading UTF-16 file.
Or make the file be in ASCII encoding. See Combine Batch/WMIC + ANSI/UNICODE Output formatting.
Though the best solution is to use WMI classes, instead of the command-line tool. See Is there a way to read the system's information in Inno Setup.

Python dictionaries formatting

Python3 I have a dictionary with alphabet count, shown below
a 24873
b 5293
c 7301
d 15567
e 38088
f 6499
g 7109
h 20360
i 20283
j 751
k 3207
l 12780
m 7686
n 21510
o 24944
p 5275
q 191
r 16751
s 18831
t 30897
u 9624
v 2551
w 8390
x 439
y 7139
z 161
and i want a function to print it as
e 38088 i 20283 w 8390 b 5293 q 191
t 30897 s 18831 m 7686 p 5275 z 161
o 24944 r 16751 c 7301 k 3207
a 24873 d 15567 y 7139 v 2551
n 21510 l 12780 g 7109 j 751
h 20360 u 9624 f 6499 x 439
I tried taking keys and values into lists but is randomly changing as a dictionary is not ordered. Thanks in advance for the help.
You can use dict.items() to get the key-value pairs as a list of tuples. Then sort this list by the value.
For example:
d = {
'a': 24873,
'b': 5293,
'c': 7301,
'd': 15567
}
for k, v in sorted(d.items(), key=lambda x: x[1], reverse=True):
print("{} {}".format(k, v))
#a 24873
#d 15567
#c 7301
#b 5293
We sort the key-value pair tuples using a custom key, lambda x: x[1], which gets the element at index 1 in the tuple. In this case, that element is the count.
Also, we specify reverse=True to indicate that we want the sorted list to be in descending order.
from itertools import zip_longest
def printCol(l, numCols=5):
"""Takes a sorted list 'l' and displays it in 'numCols', the first items
get into col1, the next in col2 etc."""
# partition the sorted list
steps = len(l)//numCols+1
part = [l[i:i+steps] for i in range(0,len(l),steps)]
# transpose it into tuple-rows
zpart = zip_longest(*part,())
for row in zpart:
for t in row:
if t: # zip_longest produces () - do not display them
print(t[1],t[0], end = " ")
print("") # newline
You have to prepare your data a bit:
d ={"a":24873,"b":5293 ,"c":7301 ,"d":15567 ,"e":38088 ,
"f":6499 ,"g":7109 ,"h":20360 ,"i":20283 ,"j":751 ,
"k":3207 ,"l":12780 ,"m":7686 ,"n":21510 ,"o":24944 ,
"p":5275 ,"q":191 ,"r":16751 ,"s":18831 ,"t":30897 ,"u":9624 ,
"v":2551 ,"w":8390 ,"x":439 ,"y":7139 ,"z":161 }
# list comp: create tuples of (count, character) so sort will sort it by count
l = [ (am,ch) for ch,am in d.items()] # create tuples, amount first for sorting
l.sort(reverse=True) # sort descending by count, if equal, then by char
printCol(l,5) # print it
Output:
e 38088 i 20283 w 8390 b 5293 q 191
t 30897 s 18831 m 7686 p 5275 z 161
o 24944 r 16751 c 7301 k 3207
a 24873 d 15567 y 7139 v 2551
n 21510 l 12780 g 7109 j 751
h 20360 u 9624 f 6499 x 439
This is the solution I figured out with just traditional loops and not many in built functions. ThankYou
<pre>
def printTable(value):
no_of_rows = len(value)//5+1
i = 0
while(i < no_of_rows):
for j in range(0, len(value), no_of_rows):
if i+j < len(value):
print(str(value[i+j][0]).rjust(3),
str(value[i+j][1]).rjust(5), end=" ")
print("")
i += 1
d ={"a":24873, "b":5293, "c":7301, "d":15567, "e":38088,
"f":6499, "g":7109, "h":20360, "i":20283, "j":751,
"k":3207, "l":12780, "m":7686, "n":21510, "o":24944,
"p":5275, "q":191, "r":16751, "s":18831, "t":30897,
"u":9624, "v":2551, "w":8390, "x":439, "y":7139,
"z":161}
dic_to_sortlist = sorted(alph_freq.items(), key=operator.itemgetter(1),
reverse=True)
printTable(dic_to_sortlist)
</pre>

Resources