I have a text string/script which I took out from a webpage. I would like to clean/structure that text string/Script so that I can only get JSON out of it. But its very long that I lost finding beginning and ending of JSON from that text. Does anyone help me out or advice a online website which can help to find the beginning and ending of JSON from that text. Many Thanks
window.__NUXT__=function(e,l,a,t,r,s,i,o,n,d){return{layout:s,data:[{product:{active_gtin:"5711555000616",active_supplier:"0000009002",active_supplier_product_id:"000000000091052931-EA",brand:"Prosonic",description:"Prosonic 32\" TV med Android og Full-HD opløsning. Android styresystemet giver dig let adgang til Netflix, Viaplay og TV2 Play samt mange andre apps og med indbygget Chromecast kan du let caste indhold til TV'et.",display_list_price:l,display_sales_price:l,energy_class:"A+",energy_class_color_code:"lev_3",energy_label:i,erp_product_id:o,gallery_images:[i,"https://sg-dam.imgix.net/services/assets.img/id/13a13e85-efe7-48eb-bb6c-953abc94fb08/size/original","https://sg-dam.imgix.net/services/assets.img/id/e0c39be1-eb82-4652-88f4-992226390a3f/size/original","https://sg-dam.imgix.net/services/assets.img/id/9bc81449-64ba-44c0-b691-31b22bf5dc91/size/original"],hybris_code:n,id:n,image_primary:"https://sg-dam.imgix.net/services/assets.img/id/f8d59494-3da7-4cb7-9dd8-e8d16577e7c4/size/original",in_stock_stores_count:15,is_approved_for_sale:t,is_exposed:t,is_reservable:t,name:'Prosonic 32" 32and6021 LED tv',online_from:16000344e5,online_to:2534022108e5,primary_category_path:"/elektronik/tv",product_url:"/produkter/prosonic-32-32and6021-led-tv/100553115/",sales_price:e,show_discount_message:a,sku:o,specifications:'[{"features":[{"code":"text-TvMemory","label":"Tekst TV hukommelse","value":"1000"}],"label":"Tekst TV hukommelse"},{"features":[{"code":"tvFeatures","label":"TV funktioner","value":"Netflix"},{"code":"tvFeatures","label":"TV funktioner","value":"SmartTV"},{"code":"tvFeatures","label":"TV funktioner","value":"Wi-Fi indbygget"}],"label":"TV funktioner"},{"features":[{"code":"TV.tvApps","label":"TV Apps","value":"Amazon"},{"code":"TV.tvApps","label":"TV Apps","value":"Apple TV"},{"code":"TV.tvApps","label":"TV Apps","value":"Blockbuster"},{"code":"TV.tvApps","label":"TV Apps","value":"Boxer"},{"code":"TV.tvApps","label":"TV Apps","value":"Dplay"},{"code":"TV.tvApps","label":"TV Apps","value":"DR TV"},{"code":"TV.tvApps","label":"TV Apps","value":"Google Play Store"},{"code":"TV.tvApps","label":"TV Apps","value":"HBO Nordic"},{"code":"TV.tvApps","label":"TV Apps","value":"Min Bio"},{"code":"TV.tvApps","label":"TV Apps","value":"Netflix"},{"code":"TV.tvApps","label":"TV Apps","value":"Rakuten TV"},{"code":"TV.tvApps","label":"TV Apps","value":"SF Anytime"},{"code":"TV.tvApps","label":"TV Apps","value":"Skype"},{"code":"TV.tvApps","label":"TV Apps","value":"Spotify"},{"code":"TV.tvApps","label":"TV Apps","value":"TV2 play"},{"code":"TV.tvApps","label":"TV Apps","value":"Viaplay"},{"code":"TV.tvApps","label":"TV Apps","value":"YouSee"},{"code":"TV.tvApps","label":"TV Apps","value":"Youtube"}],"label":"TV Apps"},{"features":[{"code":"connectivity.videoConnectivity","label":"Video tilslutning","value":"composite"}],"label":"Video tilslutning"},{"features":[{"code":"screen.monitorLanguageList","label":"Skærmsprog","value":"Dansk"}],"label":"Skærmsprog"},{"features":[{"code":"builtInSpeakers.soundFunction","label":"Lydfunktioner","value":"Bluetooth"}],"label":"Lydfunktioner"},{"features":[{"code":"productionYear","label":"Produktionsår","value":"2.020"}],"label":"Produktionsår"},{"features":[{"code":"electronics.manufacturerNum","label":"Producentens Varenummer","value":"32AND6021"}],"label":"Producentens Varenummer"},{"features":[{"code":"TV.hdrLOV","label":"HDR","value":"HDR 10"}],"label":"HDR"},{"features":[{"code":"TV.isSleepTimerPresent","label":"Sleep timer","value":"Ja"}],"label":"Sleep timer"},{"features":[{"code":"isPVRFunctionPresent","label":"PVR funktion","value":"Ja"}],"label":"PVR funktion"},{"features":[{"code":"accessoriesIncluded","label":"Tilbehør inkluderet","value":"stand og remote"}],"label":"Tilbehør inkluderet"},{"features":[{"code":"screenTechnologyDesc","label":"Skærmteknologi","value":"LED"}],"label":"Skærmteknologi"},{"features":[{"code":"tvTunerList","label":"TV-tuners","value":"CI+"},{"code":"tvTunerList","label":"TV-tuners","value":"DVB-C"},{"code":"tvTunerList","label":"TV-tuners","value":"DVB-S"},{"code":"tvTunerList","label":"TV-tuners","value":"DVB-T2"},{"code":"tvTunerList","label":"TV-tuners","value":"MPEG4 tuner"}],"label":"TV-tuners"},{"features":[{"code":"TV.vesaStandardList","label":"Vægbeslag Vesa standard","value":"75x75"}],"label":"Vægbeslag Vesa standard"},{"features":[{"code":"connectivity.hdmiCount","label":"Antal HDMI","value":"3"}],"label":"Antal HDMI"},{"features":[{"code":"builtInSpeakers.speakerEffect","label":"Højtalereffekt","value":"12"}],"label":"Højtalereffekt"},{"features":[{"code":"usbCount","label":"Antal USB stik","value":"1"}],"label":"Antal USB stik"},{"features":[{"code":"TVResolution","label":"TV opløsning","value":"Full HD"}],"label":"TV opløsning"},{"features":[{"code":"picturePlayers.supportedImageFormats","label":"Understøttede Billed Formater","value":"JPG,BMP,PNG,GIF"}],"label":"Understøttede Billed Formater"},{"features":[{"code":"scartCount","label":"Antal scartstik","value":"0"}],"label":"Antal scartstik"},{"features":[{"code":"connectivity.usbcount2","label":"Antal USB 2.0 porte","value":"1"}],"label":"Antal USB 2.0 porte"},{"features":[{"code":"Color","label":"Produktfarve","value":"sort"}],"label":"Produktfarve"},{"features":[{"code":"TV.isWatchAndTimerFunctionOnOffPresent","label":"Ur og timerfunktion til\\/fra","value":"Ja"}],"label":"Ur og timerfunktion til\\/fra"},{"features":[{"code":"TV.isAutomaticChannelSearchAvailable","label":"Automatisk kanalsøgning","value":"Ja"}],"label":"Automatisk kanalsøgning"},{"features":[{"code":"screen.screenResolution","label":"Skærmopløsning","value":"Full-HD 1920 x 1080"}],"label":"Skærmopløsning"},{"features":[{"code":"TV.software","label":"TV software","value":"Android"}],"label":"TV software"},{"features":[{"code":"connectivity.connectivityDesc","label":"Andre tilslutningsmuligheder","value":"Composite, Audio in, VGA, optisk lyd ud,"}],"label":"Andre tilslutningsmuligheder"},{"features":[{"code":"TV.twinTuner","label":"Twin Tuner","value":"Nej"}],"label":"Twin Tuner"},{"features":[{"code":"picturePlayers.supportedVideoFileFormats","label":"Understøttede videofil formater","value":".MPG .MPEG.DAT.VOB.MKV.MP4 \\/ .M4A \\/ .M4V.MOV.FLV.3GP \\/ 3GPP.TS \\/ .M2TS.RMVB .RM.AVI.ASF .WMV.WEBM"}],"label":"Understøttede videofil formater"},{"features":[{"code":"isInternetBrowserPresent","label":"Internet browser","value":"Ja"}],"label":"Internet browser"},{"features":[{"code":"wirelessConnectivityOptionList","label":"Trådløse tilslutningsmuligheder","value":"Bluetooth"},{"code":"wirelessConnectivityOptionList","label":"Trådløse tilslutningsmuligheder","value":"Wi-Fi indbygget"}],"label":"Trådløse tilslutningsmuligheder"}]',step_product_id:"GR14425172",stock_count_online:2874,stock_count_status_online:"in_stock",stock_type:"NORMAL",summary:"Med Android og indbygget Chromecast",msg_sales_price_per_unit:l,package_display_sales_price:l,promotion_text:e,f_campaign_name:[]},loadingProduct:a}],error:e,state:{User:{UID:l,isLoggedIn:a,nickname:l,address:{firstName:l,lastName:l,address:l,postalCode:l,city:l,mobile:l,email:l,country:l},isDeliveryMethodSet:a,lastSeenProducts:[],wishlistProducts:[]},Tracking:{trackedOrders:[],activeRoute:e,oldRoute:e,cookieConsentGiven:a,initialRouteTracked:a},Search:{showDrawer:a,hideGlobalSearch:a,query:l,queryString:l,queries:[],brands:[],categories:[]},Products:{products:[]},ProductDialog:{showType:a,productId:e,quantity:e,error:e},plugins:{Cart:{checkoutErrorPlugin:{},productDialogPlugin:{}},TechnicalError:{technicalErrorPlugin:{}},Tracking:{gtmPlugin:{},gtmHandlers:{appInitializedHandler:{},bannerClickedHandler:{},bannerViewedHandler:{},checkoutStepChangedHandler:{},clickCollectCompletedHandler:{},cookieConsentGivenHandler:{},externalLinkClickedHandler:{},helpers:{},notFoundPageViewedHandler:{},orderCompletedHandler:{},plpProductsViewedHandler:{},productAddedHandler:{},productClickedHandler:{},productDetailViewedHandler:{},productQuantityChangeHandler:{},productRemovedHandler:{},recommendationsClickedHandler:{},recommendationsViewedHandler:{},routeChangedHandler:{},siteSearchHandler:{}}},User:{userPlugin:{}}},Payment:{paymentMethod:e,termsAccepted:a},OAuth:{accessToken:e,expiry:0,timestamp:e,trackingId:e},Navigation:{hierarchy:e,path:[],loading:a,lastFetchedTopNode:l},Layout:{eyebrow:{default:e},footer:{default:e},layout:s},InfoBar:{infoBars:[],infoBarMappers:{}},Delivery:{isFetchingPickups:a,deliveries:{},pickups:{},selectedDeliveries:{}},ClickCollect:{loading:a,showDrawer:a,baseMapLocation:e,stores:[],selectedStore:e,product:e,quantity:1,form:{name:l,email:l,countryDialCode:"45",phone:l,terms:a},reservation:e,error:a,filters:{inStockOnly:t}},Checkout:{panelState:{userInfo:{},delivery:{},payment:{mustVisit:t},store:{}},desiredPanel:"auto",panelValidators:{}},Cart:{data:{id:l,lineItems:[],totalLineItemsQuantity:0,totalSalesPrice:r,totalShippingSalesPrice:r,employeeNumber:e,loyaltyNumber:e,deliveries:[],totalLineItemSalesPrice:r,totalLineItemListPrice:r,totalLineItemDiscount:r,totalShippingListPrice:r,totalShippingPriceDiscount:r,orderNumber:e,totalSalesPriceNumber:0,isActive:t,isAllLineItemsValid:t,shippingAddress:d,billingAddress:d,hash:l,discountCodes:[],source:"USER_DEVICE"},loading:{},error:e,assistedSalesMode:a,assistedSalesStoreNumber:e},Breadcrumb:{categoryTree:{},productCategory:l,lookupBreadcrumbTasks:{},currentCategoryPage:[],helpers:{}}},serverRendered:t}}(null,"",!1,!0,"0,00","default","https://sg-dam.imgix.net/services/assets.img/id/87a045c1-0923-4575-81ce-fd9b7c3bfbf6/size/original","91052931-EA","100553115",void 0)
You can use a RegEx to get the Jsons from your string.
I have used this pattern: {(?:[^{}]*{[^{]*})*[^{}]*}
The above regex checks only the Json in one level deep.
Code:
import re
import json
input_data = """window.__NUXT__=funct ... A","100553115",void 0)"""
def json_validate(input_str):
founds = re.findall(r"{(?:[^{}]*{[^{]*})*[^{}]*}", input_str)
valid_jsons = []
for x in founds:
try:
valid_jsons.append(json.loads(x))
except json.JSONDecodeError:
continue
return valid_jsons
getting_jsons = json_validate(input_data)
for one_json in getting_jsons:
print(one_json)
print(len(getting_jsons))
It can find several (32) valid Jsons in your string:
>>> python3 test.py
{'features': [{'code': 'text-TvMemory', 'label': 'Tekst TV hukommelse', 'value': '1000'}], 'label': 'Tekst TV hukommelse'}
{'features': [{'code': 'tvFeatures', 'label': 'TV funktioner', 'value': 'Netflix'}, {'code': 'tvFeatures', 'label': 'TV funktioner', 'value': 'SmartTV'}, {'code': 'tvFeatures', 'label': 'TV funktioner', 'value': 'Wi-Fi indbygget'}], 'label': 'TV funktioner'}
{'features': [{'code': 'TV.tvApps', 'label': 'TV Apps', 'value': 'Amazon'}, {'code ...
I have found another solution which approaches the issue from totally different way: https://stackoverflow.com/a/54235803/11502612
I have tested the code from the above answer and I got the same output. It means the result is correct (probably).
Would it not be easier to do something like
import json
data = json.dumps(your_string)
Then iterate over it to find the values. Alternatively you can look for the value locations with
find("{")
Don't know if this is what your looking for but thought it may spark an idea / alternative view
The link i am trying to scrape is https://www.zomato.com/lucknow/skyhilton-1-alambagh/reviews specifically the 'Names' and 'Reviews'.
I keep getting the timeout error while requesting the url, i haven't defined the timeout limit in this case. Is there a way to make my request more manageable or should i use some other libraries/modules for this purpose.
Error message : ' TimeoutError: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond '
Here is my code:
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as uReq
from urllib.request import Request
url = 'https://www.zomato.com/lucknow/skyhilton-1-alambagh/reviews'
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
uClient = uReq(req)
page = uClient.read()
page_html = soup(page, "html.parser")
containers = page_html.findAll("div",{"class":"sc-eetwQk hAcPWO"})
print(soup.pretiffy(containers[0]))
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0'
}
params = {
'sort': 'dd',
'filter': 'reviews-dd',
'res_id': 18439027
}
def main(url):
with requests.Session() as req:
for page in range(1, 11):
print(f"{'*' * 30} Extracting Page# {page} {'*' * 30}")
params['page'] = page
r = req.get(url, params=params, headers=headers).json()
for x in r['entities']['REVIEWS'].values():
print("Username: {:<20}, Comment: {}".format(
x['userName'], x['reviewText']))
main("https://www.zomato.com/webroutes/reviews/loadMore")
Output:
****************************** Extracting Page# 1 ******************************
Username: Nitisha Dwivedi , Comment: I tried veg manchurian, veg noodles, pasta arrabiata, virgin mojito from sky hilton and the food was great, freshly cooked, very tasty and well presented. The waiters were attentive and service was overall good but some of the waiters are rude even. I would suggest you to come to this place and enjoy food!
Username: Sakshi Jaiswal , Comment: this place is lovely rocking<br/>the party place <br/>best saturday place<br/>live music great taste<br/><br/>
Username: Atul , Comment: They send me gravy chicken while I ordered tava chicken listed under DRY snacks and beforehand informed Mr. Dinesh Dixit as well about the order.But this is what I received. Gravy can be clearly seen with Oil spilling all over. When I informed ZOMATO and the manager about it, ZOMATO said they will give the feedback to the restaurant (unsatisfactory resolution) and the manager said this is how dry tava chicken looks like. Do not order online as no one will listen to you even if you are right . Pathetic experience from SkyHilton and ZOMATO as well this time.
Username: Sweety Singh , Comment:
Username: Aman Bhardwaj , Comment: the food is awesome you can even visit here with your family the taste of food is ❤️❤️❤️
****************************** Extracting Page# 2 ******************************
Username: Akanksha Singh , Comment:
Username: Harsh Mehrotra , Comment:
Username: Sheetal Kapoor , Comment: I have given them 4 stars because of the service as the restaurant need to really work upon that. <br/>
Username: Mâñvéñdrâ Singh , Comment:
Username: Vishal Yadav , Comment: Good food
****************************** Extracting Page# 3 ******************************
Username: Avni Singh , Comment:
Username: AVNI SINGH , Comment:
Username: Avni Singh , Comment:
Username: Vanshika Shukla , Comment:
Username: Anushka Singh , Comment:
****************************** Extracting Page# 4 ******************************
Username: Anushka Singh , Comment:
Username: Priya Singh , Comment:
Username: Chandramohan Yadav , Comment: Staff and ambience is too good and a healthy and friendly environment
Username: Kavita Vishwakarma , Comment:
Username: Govind Bahadur , Comment: Good food good taste all time to choose this place for ordering and dining very good place and serve very good
****************************** Extracting Page# 5 ******************************
Username: Govind Bahadur , Comment: Such a recommended place to all.<br/>Here food serve good with hygiene way and with a superior good taste.
Username: Anuj Kashyap , Comment: My friend was suggested me to order from here and i verry surprised by there taste and food quality.<br/>Thank you Sky Hilton to serve us verry well.
Username: Anuj Kashyap , Comment: My friend was suggested me to order from here and i verry surprised by there taste and food quality.<br/>Thank you Sky Hilton to serve us verry well.
Username: Akash Choubey , Comment: Good food and very good taste my friend was suggested to order from here and I will totally appreciate the food taste and hygiene packing 😋😋
Username: Akshat Anand , Comment: fine dine restaurant is very excellent and the service person is very kind ; dheer singh who is so amazing and overall experience is very amazing
****************************** Extracting Page# 9 ******************************
Username: Anushree Nigam , Comment: Dheer was very courteous while serving!
Username: Agosh Baranwal , Comment: Amazing staff. Very dedicated and polite.
Username: Sameer Agarwal , Comment:
Username: Vinod Kushwaha , Comment: Awesome food and service by Dheer singh
Username: Manendra Singh , Comment: Nice ambiance music was soo good i love skyhilton exxillent service given by ajeet patel
****************************** Extracting Page# 10 ******************************
Username: Vandana Singh , Comment: awesome service very nice food 🎎🎎🎎🎎🎎🎎🎎🎎<br/>Service dheer singh
Username: Harpreet Singh , Comment: Best service by shubham kanchan
Username: Shruti Mirchandani , Comment: Heard about this restaurant cum bar as one of the trending outlet in Lucknow in Alambagh.<br/>Can visit for good food simply served in very simple way have tried Handi Mutton, Pasta, Paneer Tikka Masala, jeera rice and of course drinks.<br/><br/>Paneer tikka masala is actually good not like any other restaurant who put All that capsicum and onion in paneer tikka masala it's blend of flavor full masala and taste good.<br/><br/>Handi masala was also tasted good.<br/><br/>Though didn't like the ambience at all as it could be done better.<br/><br/>Also can eenjoy Hukka in open sitting area at fifth floor.
Username: Anmol Kacker , Comment: I am writing this review after my fourth visit to this place. Rest all my visits were on a weekday afternoon so, decided to give this place a try on a Saturday night.<br/>The ambiance though a lot changed than before, was good but that's pretty much it. The music was way too loud and deafening. They are currently having a street food festival
so, decided to try a couple of items. Vada pav and omelette were the items ordered. Despite repeatedly telling the person who took the order to give a masala omelette, he got a plain one and that too cold. The crispy corn ordered was awful. Sweet corn soup, was simply cold water with some half boiled vegetables and traces of corn. It had just no taste at all. Tried finishing the items, unfortunately, couldn't even eat half of the meal.<br/>Finally, decided to get up and walk away. Saturday nights can be maddening with overflowing crowd but atleast do something about your food. Quick service stands nowhere if what you are delivering is so bad. Really thought the place would be a bit different this time. Unfortunately, it was as bad as before. Done with this place ! Never again !<br/><br/>Also, I am not big a fan of saturday night parties but all I have to say here is that people were coming in, looking around and walking away. Seeing heavily drunken men dancing is not what a saturday night means to me or I guess to anyone.
Username: Piyush Kumar , Comment: Please increase the salary of mr. Bhanu he is good in f&b Service skills
text
Donald Trump:
791697302519947264,1477604720,Ohio USA,Twitter for iPhone,5251,1895
Join me live in Springfield, Ohio!
Lit
<<<EOT
781619038699094016,1475201875,United States,Twitter for iPhone,31968,17246
While Hillary profits off the rigged system, I am fighting for you! Remember the simple phrase: #FollowTheMoney...
<<<EOT
def read(text):
with open(text,'r') as f:
for line in f:
Is there a way that i can separate each information for the candidates So for example for Donald Trump it should be
[
[Donald Trump],
[791697302519947264[[791697302519947264,1477604720,'Ohio USA','Twitter for iPhone',5251,18951895], 'Join['Join me live in Springfield, Ohio! Lit']Lit']],
[781619038699094016[[781619038699094016,1475201875,'United States','Twitter for iPhone',31968,1724617246], 'While['While Hillary profits off the rigged system, I am fighting for you! Remember the simple phrase: #FollowTheMoney...']']]
]
The format of the file is the following:
ID,DATE,LOCATION,SOURCE,FAVORITE_COUNT,RETWEET_COUNT text(the tweet)
So basically after the 6 headings, everything after that is a tweet till '<<
Also is there a way i can do this for every candidate in the file
I'm not sure why you need a multi-dimensional list (I would pick tuples and dictionaries if possible) but this seems to produce the output you asked for:
>>> txt = """Donald Trump:
... 791697302519947264,1477604720,Ohio USA,Twitter for iPhone,5251,1895
... Join me live in Springfield, Ohio!
... Lit
... <<<EOT
... 781619038699094016,1475201875,United States,Twitter for iPhone,31968,17246
... While Hillary profits off the rigged system, I am fighting for you! Remember the simple phrase: #FollowTheMoney...
... <<<EOT
... Another Candidate Name:
... 12312321,123123213,New York USA, Twitter for iPhone,123,123
... This is the tweet text!
... <<<EOT"""
>>>
>>>
>>> buffer = []
>>> tweets = []
>>>
>>> for line in txt.split("\n"):
... if not line.startswith("<<<EOT"):
... buffer.append(line)
... else:
... if buffer[0].strip().endswith(":"):
... tweets.append([buffer.pop(0).rstrip().replace(":", "")])
... metadata = buffer.pop(0).split(",")
... tweet = [" ".join(line for line in buffer).replace("\n", " ")]
... tweets.append([metadata, tweet])
... buffer = []
...
>>>
>>> from pprint import pprint
>>>
>>> pprint(tweets)
[['Donald Trump'],
[['791697302519947264',
'1477604720',
'Ohio USA',
'Twitter for iPhone',
'5251',
'1895'],
['Join me live in Springfield, Ohio! Lit']],
[['781619038699094016',
'1475201875',
'United States',
'Twitter for iPhone',
'31968',
'17246'],
['While Hillary profits off the rigged system, I am fighting for you! Remember the simple phrase: #FollowTheMoney... ']],
['Another Candidate Name'],
[['12312321',
'123123213',
'New York USA',
' Twitter for iPhone',
'123',
'123'],
['This is the tweet text!']]]
>>>
I am not quite understanding... but here is my example to read a file line by line then add that line to a string of text to post to twitter.
candidates = open("FILEPATH WITH DOUBLE \") #example "C:\\users\\fox\\desktop\\candidates.txt"
for candidate in candidates():
candidate = candidate.rstrip('\n') #removes new line(this is mandatory)
#next line post means post to twitter
post("propaganda here " + candidate + "more propaganda)
note for every line in that file this code will post to twitter
ex.. 20 lines means twenty twitter posts