I have an xml file that looks like this :
<?xml version="1.0" encoding="UTF-8"?>
<Configuration>
<Options>
<SampleRate>1000</SampleRate>
<MaxStateSize>1</MaxStateSize>
<MaxOutputSize>1</MaxOutputSize>
</Options>
<Observer>
<Electrical>
<Filter2 class="MatlabFilter">
<FileName>tesla1.mat</FileName>
</Filter2>
</Electrical>
</Observer>
<CustomDefinitions>
<MyRser class="OhmicResistance">
<Object class="LookupObj2dWithState">
<RowState cacheref="Soc"/>
<ColState cacheref="ThermalState"/>
<LookupData>
0.02597518381655694900, 0.02513715386193249600, 0.02394715132636577100, 0.02325996676357371800, 0.02317075771456176400, 0.02277814077034603900, 0.02267913709322775700, 0.02258569292134297900, 0.02235026503875497600, 0.02222478423822949300, 0.02207606555239715500, 0.02198493491067361700, 0.02188144525929673300, 0.02167985791309091600, 0.02145797158835977700, 0.02137484908165417400, 0.02126561803424023600, 0.02124462299304301700, 0.02123310358079429400, 0.02126287857906075300, 0.02094998489960795500, 0.02073326148328196600, 0.02062489977511897100, 0.02038933084432985300;
</LookupData>
<MeasurementPointsRow desc="StateOfCharge">
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
</MeasurementPointsRow>
<MeasurementPointsColumn desc="ThermalState">
25
</MeasurementPointsColumn>
</Object>
</MyRser>
<MyZarc1 class = "ZarcElement">
<LookupOhmicResistance class="LookupObj2dWithState">
<RowState cacheref="Soc"/>
<ColState cacheref="ThermalState"/>
<LookupData>
0.00514195955695974370, 0.00529664894839530780, 0.00551630788423380920, 0.00564315318521097210, 0.00548602656854314720, 0.00581013032515952100, 0.00568539803363024480, 0.00559993408980672710, 0.00568126471231252940, 0.00555055893805056660, 0.00553252438800645470, 0.00533018726307717910, 0.00509464327131546690, 0.00493804793774787340, 0.00493109886771354610, 0.00477442697147319100, 0.00502025446307734210, 0.00487842931495043040, 0.00493811443745090660, 0.00515729571528417920, 0.00533726464325906100, 0.00593981588241465630, 0.00624109150199245260, 0.00689603850107461950;
</LookupData>
<MeasurementPointsRow desc="StateOfCharge">
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
</MeasurementPointsRow>
<MeasurementPointsColumn desc="ThermalState">
25
</MeasurementPointsColumn>
</LookupOhmicResistance>
<LookupTau class="LookupObj2dWithState">
<RowState cacheref="Soc"/>
<ColState cacheref="ThermalState"/>
<LookupData>
0.00063734182589317051, 0.00057789185722226211, 0.00049347290170957214, 0.00044472392739942722, 0.00046367738461651579, 0.00042508654901320510, 0.00044023991357759556, 0.00044357425217429899, 0.00042351365487481847, 0.00042064833809841269, 0.00042058843579174227, 0.00041492786364805096, 0.00040118819045609395, 0.00038561142394963408, 0.00037747031839295342, 0.00038856437977947608, 0.00044028413845948800, 0.00041943961777239272, 0.00048448584895609926, 0.00057788915932770971, 0.00047928926455149246, 0.00041669287910303676, 0.00038539468637880899, 0.00031735513697831374;
</LookupData>
<MeasurementPointsRow desc="StateOfCharge">
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
</MeasurementPointsRow>
<MeasurementPointsColumn desc="ThermalState">
25
</MeasurementPointsColumn>
</LookupTau>
<LookupPhi class="LookupObj2dWithState">
<RowState cacheref="Soc"/>
<ColState cacheref="ThermalState"/>
<LookupData>
1.07974864719212200000, 0.99348199898848211000, 0.87098335853931330000, 0.80024470701232853000, 0.80000000000000171000, 0.80000000000000038000, 0.80000000000000260000, 0.80000691369860855000, 0.80000000000000060000, 0.80000000000000027000, 0.80004367531484710000, 0.80000000000000149000, 0.80000000000000060000, 0.80000000000000249000, 0.80000000000000071000, 0.81779623001734880000, 0.80000000005521332000, 0.81111396956650339000, 0.81011503368816196000, 0.81230494833996914000, 0.80091506729318152000, 0.80000887696454015000, 0.79955578180021947000, 0.79857079231256578000;
</LookupData>
<MeasurementPointsRow desc="StateOfCharge">
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
</MeasurementPointsRow>
<MeasurementPointsColumn desc="ThermalState">
25
</MeasurementPointsColumn>
</LookupPhi>
</MyZarc1>
<MyZarc2 class = "ZarcElement">
<LookupOhmicResistance class="LookupObj2dWithState">
<RowState cacheref="Soc"/>
<ColState cacheref="ThermalState"/>
<LookupData>
0.03301452546632480100, 0.02957305472348599100, 0.02468616626865488600, 0.02186416025952706400, 0.01289985206743022800, 0.00816660308080110340, 0.00614478067084063380, 0.00525792478601965800, 0.00476592820730910300, 0.00458754759011727100, 0.00446897880675643960, 0.00468454174521149450, 0.00546732172234631280, 0.00563330158181225140, 0.00573737831209459540, 0.00607025493753720390, 0.00599470544834411390, 0.00637374896940332350, 0.00769547582533563430, 0.00930753356619881850, 0.01346675908515722000, 0.01684051438714963600, 0.01852739203814583700, 0.02219451736639845400;
</LookupData>
<MeasurementPointsRow desc="StateOfCharge">
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
</MeasurementPointsRow>
<MeasurementPointsColumn desc="ThermalState">
25
</MeasurementPointsColumn>
</LookupOhmicResistance>
<LookupTau class="LookupObj2dWithState">
<RowState cacheref="Soc"/>
<ColState cacheref="ThermalState"/>
<LookupData>
0.33635989830427793000, 0.28278374698293790000, 0.20670561210663510000, 0.16277316802313629000, 0.07298525375191647300, 0.03815615743074976500, 0.02451719292084131000, 0.01875641581778507100, 0.01630852098453383600, 0.01444778149059807100, 0.01353083053127584000, 0.01288716696808942500, 0.01409708819428110100, 0.01371252505120631800, 0.01432760299471728600, 0.01498271848043122500, 0.01622563803600479300, 0.01719588374486775300, 0.02298417293562926200, 0.03243196651297039500, 0.05525426980384628300, 0.09905004722829763300, 0.12094793594052324000, 0.16855204183666592000;
</LookupData>
<MeasurementPointsRow desc="StateOfCharge">
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
</MeasurementPointsRow>
<MeasurementPointsColumn desc="ThermalState">
25
</MeasurementPointsColumn>
</LookupTau>
<LookupPhi class="LookupObj2dWithState">
<RowState cacheref="Soc"/>
<ColState cacheref="ThermalState"/>
<LookupData>
0.70799773770539609000, 0.72250839021147018000, 0.74311351677009541000, 0.75501225182507614000, 0.74177842881577971000, 0.80122091069407952000, 0.86258087865252020000, 0.89734325213456811000, 0.91923534463767520000, 0.91834024872224662000, 0.93526886842710921000, 0.89875057203436626000, 0.82894875436479731000, 0.82264747106578762000, 0.81996236853572801000, 0.80358872652465718000, 0.81906893212010168000, 0.83493987333754394000, 0.79039503004349965000, 0.80763292126126807000, 0.80002267784885428000, 0.87508932730353728000, 0.91262265203087867000, 0.99421683622075130000;
</LookupData>
<MeasurementPointsRow desc="StateOfCharge">
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
</MeasurementPointsRow>
<MeasurementPointsColumn desc="ThermalState">
25
</MeasurementPointsColumn>
</LookupPhi>
</MyZarc2>
<MyWarburgCotanh1 class = "WarburgCotanh" RCCounter = "10" WithCapacity = "False">
<Sigma class="LookupObj2dWithState">
<RowState cacheref="Soc"/>
<ColState cacheref="ThermalState"/>
<LookupData>
0.00019493534185688539, 0.00019493534185688539, 0.00019493534185688539, 0.00067426749346130913, 0.00106494971394849010, 0.00108151588850858910, 0.00099364136002325814, 0.00077950646290164086, 0.00058746266333470842, 0.00047915035146729013, 0.00041684830055908038, 0.00038907155296702889, 0.00085773887244337571, 0.00059930121703808173, 0.00009999999999863060, 0.00122801503737189690, 0.00061160314761842048, 0.00108680429208242790, 0.00090474553083427062, 0.00010000001120442429, 0.00085797449185355860, 0.00158984738214035730, 0.00195578382728375540, 0.00275129783846505670;
</LookupData>
<MeasurementPointsRow desc="StateOfCharge">
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
</MeasurementPointsRow>
<MeasurementPointsColumn desc="ThermalState">
25
</MeasurementPointsColumn>
</Sigma>
<Clim class="LookupObj2dWithState">
<RowState cacheref="Soc"/>
<ColState cacheref="ThermalState"/>
<LookupData>
184114.09294586902000000000, 184114.09294586902000000000, 184114.09294586902000000000, 296640.35877942020000000000, 155563.70463566060000000000, 161741.09030818491000000000, 274329.47505753755000000000, 365937.43100107106000000000, 471875.56453434512000000000, 574523.49228195997000000000, 500829.98327329860000000000, 500831.93837176822000000000, 516912.44569578546000000000, 14666.70203326588300000000, 500829.87961207400000000000, 171301.60968597961000000000, 15001.83580496977800000000, 11385.27633061551200000000, 39670.03821881797200000000, 15241.49130727929200000000, 45814.20857787308300000000, 6875.05990426292740000000, 6875.05990426292740000000, 6875.05990426292740000000;
</LookupData>
<MeasurementPointsRow desc="StateOfCharge">
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
</MeasurementPointsRow>
<MeasurementPointsColumn desc="ThermalState">
25
</MeasurementPointsColumn>
</Clim>
</MyWarburgCotanh1>
<MyWarburgCotanh2 class = "WarburgCotanh" RCCounter = "10" WithCapacity = "False">
<Sigma class="LookupObj2dWithState">
<RowState cacheref="Soc"/>
<ColState cacheref="ThermalState"/>
<LookupData>
0.00836492574413456700, 0.00613129418512617630, 0.00295953737133426190, 0.00112795949294738170, 0.00037522002409440096, 0.00037581037146368275, 0.00068733685061351352, 0.00077015527398605326, 0.00080364831088865424, 0.00080077590753162026, 0.00082462291914824569, 0.00105712605279276560, 0.00100000000000000000, 0.00154529106500599240, 0.00145914207868891010, 0.00016323885946597442, 0.00101833795867687770, 0.00074589210831628103, 0.00010000004570686281, 0.00081464142917979561, 0.00010000043978881727, 0.00134063620710405860, 0.00196095409076167720, 0.00330947122914780620;
</LookupData>
<MeasurementPointsRow desc="StateOfCharge">
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
</MeasurementPointsRow>
<MeasurementPointsColumn desc="ThermalState">
25
</MeasurementPointsColumn>
</Sigma>
<Clim class="LookupObj2dWithState">
<RowState cacheref="Soc"/>
<ColState cacheref="ThermalState"/>
<LookupData>
23468.99221334158200000000, 18757.39143540290400000000, 12066.91833072997900000000, 8203.40569282026260000000, 9766.85232859530520000000, 13716.41747447804300000000, 20490.20448531251200000000, 23705.69491095333100000000, 22494.38807232537900000000, 19915.08394238330600000000, 16096.56074658045700000000, 10353.87296285772600000000, 8796.65707198722160000000, 239019.99718881631000000000, 12242.34469069729300000000, 18104.95029301897000000000, 213446.54566039098000000000, 239019.99118228391000000000, 12203.13852505865500000000, 44374.37961494550100000000, 12440.67207137969900000000, 60795.57175896296400000000, 84973.02160275452500000000, 137532.69517621450000000000;
</LookupData>
<MeasurementPointsRow desc="StateOfCharge">
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
</MeasurementPointsRow>
<MeasurementPointsColumn desc="ThermalState">
25
</MeasurementPointsColumn>
</Clim>
</MyWarburgCotanh2>
<MyRC2 class="ParallelRC">
<LookupOhmicResistance class="LookupObj2dWithState">
<RowState cacheref="Soc"/>
<ColState cacheref="ThermalState"/>
<LookupData>
0.00366188831554084640, 0.00562151834301598440, 0.00840419298203068030, 0.01001108960456029300, 0.00967486401762034100, 0.00939968391674490640, 0.00872919497962075870, 0.00822783378103364090, 0.00813506346266940770, 0.00779007715917680400, 0.00679257413068258020, 0.00606271929870643490, 0.00841078045910312150, 0.00791003775904466560, 0.01002849549145711600, 0.00997501477985486030, 0.00855965252090381410, 0.00609591267939118930, 0.00982352625138059080, 0.00964750941170980020, 0.00739790720395582920, 0.00115957740601912650, 0.00115957740601912650, 0.00115957740601912650;
</LookupData>
<MeasurementPointsRow desc="StateOfCharge">
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
</MeasurementPointsRow>
<MeasurementPointsColumn desc="ThermalState">
25
</MeasurementPointsColumn>
</LookupOhmicResistance>
<LookupTau class="LookupObj2dWithState">
<RowState cacheref="Soc"/>
<ColState cacheref="ThermalState"/>
<LookupData>
0.65173297869134739000, 0.65173297869134739000, 0.65173297869134739000, 32.59419722896010500000, 34.72865871720645000000, 34.47066564948234200000, 36.97952431512094100000, 36.93220799476522100000, 38.35431610344169000000, 34.09556741639196500000, 27.50565903985229400000, 26.75681820890759600000, 34.49371402438613600000, 43.24257987359646900000, 51.09778145546822300000, 45.00971082844134200000, 38.28699846139706900000, 31.07413008266951600000, 31.12255552828149900000, 28.80453236406994900000, 20.78359704561795200000, 0.10000010082754476000, 0.10000010082754476000, 0.10000010082754476000;
</LookupData>
<MeasurementPointsRow desc="StateOfCharge">
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
</MeasurementPointsRow>
<MeasurementPointsColumn desc="ThermalState">
25
</MeasurementPointsColumn>
</LookupTau>
</MyRC2>
<MyOCV class = "VoltageSource">
<Object class="LookupObj2dWithState">
<RowState cacheref="Soc"/>
<ColState cacheref="ThermalState"/>
<LookupData>
1.90870643517178310000, 2.50000000000000000000, 3.33963686205606790000, 3.37337661754281680000, 3.43586504041013500000, 3.49016807881707790000, 3.53528751514386390000, 3.56403618253534700000, 3.58839269241070680000, 3.61254955876211210000, 3.64030000853381400000, 3.67403976401328690000, 3.72634636718430600000, 3.78643906777870100000, 3.82976171238624420000, 3.86849255596098420000, 3.90602553838471110000, 3.94815032185942980000, 3.99985799445857990000, 4.05436067638947860000, 4.09948011271626460000, 4.15677780397527390000, 4.20000000000000020000, 4.29396129570592680000;
</LookupData>
<MeasurementPointsRow desc="StateOfCharge">
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
</MeasurementPointsRow>
<MeasurementPointsColumn desc="ThermalState">
25
</MeasurementPointsColumn>
</Object>
</MyOCV>
<MyCellElement1 class="CellElement" observable="True">
<ThermalState class= "ThermalState" cache= "True">
<InitialTemperature>25</InitialTemperature>
</ThermalState>
<Soc class="Soc" cache="True">
<MaxCapacity>3.0371</MaxCapacity>
<InitialSoc>88.3</InitialSoc>
<Measurementpoints>0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100</Measurementpoints>
</Soc>
<Children>
<OhmicResistance ref="MyRser"/>
<ParallelRC ref="MyRC2"/>
<ZarcElements ref="MyZarc1"/>
<ZarcElements ref="MyZarc2"/>
<WarburgCotanh ref="MyWarburgCotanh1"/>
<WarburgCotanh ref="MyWarburgCotanh2"/>
<VoltageSource ref="MyOCV"/>
</Children>
</MyCellElement1>
</CustomDefinitions>
<RootElement class="SerialTwoPort">
<Children count="1">
<Pack ref="MyCellElement1"/>
</Children>
</RootElement>
</Configuration>
I want to fetch the lookup data for SOC and resistance in the MyRes class "ohmic resistance" in the root.child('CustomDefinitions'). How can I efficiently extract the two data sets and store them in arrays using python, as I have no experience with xml files.
If I understand you correctly, you need something along these lines:
from lxml import etree
data = """[your xml above]"""
doc = etree.XML(data.encode())
soc = doc.xpath('//CustomDefinitions//MyRser[#class="OhmicResistance"]//MeasurementPointsRow[# desc="StateOfCharge"]/text()')
lud = doc.xpath('//CustomDefinitions//MyRser[#class="OhmicResistance"]//LookupData/text()')
print(soc[0].strip()
print(lud[0].strip()
Output:
-5, 0, 7.100000e+00, 1.120000e+01, 16, 2.080000e+01, 2.560000e+01, 3.040000e+01, 3.520000e+01, 4.010000e+01, 4.490000e+01, 4.970000e+01, 5.450000e+01, 5.930000e+01, 6.420000e+01, 69, 7.380000e+01, 7.860000e+01, 8.350000e+01, 8.830000e+01, 9.310000e+01, 9.770000e+01, 100, 105
0.02597518381655694900, 0.02513715386193249600, 0.02394715132636577100, 0.02325996676357371800, 0.02317075771456176400, 0.02277814077034603900, 0.02267913709322775700, 0.02258569292134297900, 0.02235026503875497600, 0.02222478423822949300, 0.02207606555239715500, 0.02198493491067361700, 0.02188144525929673300, 0.02167985791309091600, 0.02145797158835977700, 0.02137484908165417400, 0.02126561803424023600, 0.02124462299304301700, 0.02123310358079429400, 0.02126287857906075300, 0.02094998489960795500, 0.02073326148328196600, 0.02062489977511897100, 0.02038933084432985300;
I am trying to scrape Myntra but I got errors. I did many changes in the code. I tried requests package as well as urllib but still getting error.
Sometimes I got timeout error or urllib.error.URLError:
urllib.error.URLError: <urlopen error Tunnel connection failed: 502 Proxy Error (no funds available)>
Here is my code.
import os, ssl, http, gzip
import urllib.request
from bs4 import BeautifulSoup
import re
from http.cookiejar import CookieJar
import json
import http
import requests
def myntraScraper(url):
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context
cj = CookieJar()
proxy = {
'https': '------',
'http': '-------'
}
# user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'
try:
import urllib.request as urllib2
except ImportError:
import urllib2
urllib2.install_opener(
urllib2.build_opener(
urllib2.ProxyHandler(proxy),
urllib.request.HTTPCookieProcessor(cj)
)
)
request = urllib2.Request(url, headers={
'accept-encoding': 'gzip',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36',
})
page = urllib2.urlopen(request)
html = gzip.decompress(page.read()).decode('utf-8')
soup = BeautifulSoup(html, 'lxml')
print(soup)
myntraScraper("https://www.myntra.com/sports-shoes/puma/puma-men-blue-hybrid-fuego-running-shoes/11203218/buy")
Currently, I am using Smartproxy. But I tried the same thing with PacketStream and Luminati. Most of the time I got the proxy error.
Myntra stores all the product data in a variable in a script variable called pdpData.
The below script gets the whole json that contains all the data regarding the product.
import requests, json
from bs4 import BeautifulSoup
headers = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'}
s = requests.Session()
res = s.get("https://www.myntra.com/sports-shoes/puma/puma-men-blue-hybrid-fuego-running-shoes/11203218/buy", headers=headers, verify=False)
soup = BeautifulSoup(res.text,"lxml")
script = None
for s in soup.find_all("script"):
if 'pdpData' in s.text:
script = s.get_text(strip=True)
break
print(json.loads(script[script.index('{'):]))
Output:
{'pdpData': {'id': 11203218, 'name': 'Puma Men Blue Hybrid Fuego Running Shoes', 'mrp': 6499, 'manufacturer': 'SSIPL RETAIL LIMITED, KUNDLI,75, SERSA ROAD, 131028 SONEPAT', 'countryOfOrigin': 'India', 'colours': None, 'baseColour': 'Blue', 'brand': {'uidx': '', 'name': 'Puma', 'image': '', 'bio': ''}, 'media': {'videos': [], 'albums': [{'name': 'default', 'images': [{'src': 'http://assets.myntassets.com/h_($height),q_($qualityPercentage),w_($width)/v1/assets/images/productimage/2019/12/20/0c15e03c-863b-4a4a-9bb7-709a733fd4821576816965952-1.jpg', 'secureSrc': 'https://assets.myntassets.com/h_($height),q_($qualityPercentage),w_($width)/v1/assets/images/productimage/2019/12/20/0c15e03c-863b-4a4a-9bb7-709a733fd4821576816965952-1.jpg', 'host': None, 'imageURL': 'http://assets.myntassets.com/assets/images/productimage/2019/12/20/0c15e03c-863b-4a4a-9bb7-709a733fd4821576816965952-1.jpg', 'annotation': []}, {'src': 'http://assets.myntassets.com/h_($height),q_($qualityPercentage),w_($width)/v1/assets/images/productimage/2019/12/20/69bfa4e0-1ac4-4adf-b84e-4815ff60e8831576816966007-2.jpg', 'secureSrc': 'https://assets.myntassets.com/h_($height),q_($qualityPercentage),w_($width)/v1/assets/images/productimage/2019/12/20/69bfa4e0-1ac4-4adf-b84e-4815ff60e8831576816966007-2.jpg', 'host': None, 'imageURL': 'http://assets.myntassets.com/assets/images/productimage/2019/12/20/69bfa4e0-1ac4-4adf-b84e-4815ff60e8831576816966007-2.jpg', 'annotation': []}, {'src': 'http://assets.myntassets.com/h_($height),q_($qualityPercentage),w_($width)/v1/assets/images/productimage/2019/12/20/d2fd0ca0-1643-43ae-a0fc-fb1309580e151576816966049-3.jpg', 'secureSrc': 'https://assets.myntassets.com/h_($height),q_($qualityPercentage),w_($width)/v1/assets/images/productimage/2019/12/20/d2fd0ca0-1643-43ae-a0fc-fb1309580e151576816966049-3.jpg', 'host': None, 'imageURL': 'http://assets.myntassets.com/assets/images/productimage/2019/12/20/d2fd0ca0-1643-43ae-a0fc-fb1309580e151576816966049-3.jpg', 'annotation': []}, {'src': 'http://assets.myntassets.com/h_($height),q_($qualityPercentage),w_($width)/v1/assets/images/productimage/2019/12/20/0edae428-b9c0-4755-9127-0961d872b78a1576816966095-4.jpg', 'secureSrc': 'https://assets.myntassets.com/h_($height),q_($qualityPercentage),w_($width)/v1/assets/images/productimage/2019/12/20/0edae428-b9c0-4755-9127-0961d872b78a1576816966095-4.jpg', 'host': None, 'imageURL': 'http://assets.myntassets.com/assets/images/productimage/2019/12/20/0edae428-b9c0-4755-9127-0961d872b78a1576816966095-4.jpg', 'annotation': []}, {'src': 'http://assets.myntassets.com/h_($height),q_($qualityPercentage),w_($width)/v1/assets/images/productimage/2019/12/20/c59c7677-2bbd-4dbe-9b02-7c321c29cb701576816966142-5.jpg', 'secureSrc': 'https://assets.myntassets.com/h_($height),q_($qualityPercentage),w_($width)/v1/assets/images/productimage/2019/12/20/c59c7677-2bbd-4dbe-9b02-7c321c29cb701576816966142-5.jpg', 'host': None, 'imageURL': 'http://assets.myntassets.com/assets/images/productimage/2019/12/20/c59c7677-2bbd-4dbe-9b02-7c321c29cb701576816966142-5.jpg', 'annotation': []}]}, {'name': 'animatedImage', 'images': []}]}, 'sbpEnabled': False, 'sizechart': {'sizeChartUrl': None, 'sizeRepresentationUrl': 'http://assets.myntassets.com/assets/images/sizechart/2016/12/12/11481538267795-footwear.png'}, 'sizeRecoLazy': {'actionType': 'lazy', 'action': '/product/11203218/size/recommendation', 'sizeProfileAction': '/user/size-profiles?gender=male&articleType=Sports%20Shoes'}, 'analytics': {'articleType': 'Sports Shoes', 'subCategory': 'Shoes', 'masterCategory': 'Footwear', 'gender': 'Men', 'brand': 'Puma', 'colourHexCode': None}, 'crossLinks': [{'title': 'More Sports Shoes by Puma', 'url': 'sports-shoes?f=Brand:Puma::Gender:men'}, {'title': 'More Blue Sports Shoes', 'url': 'sports-shoes?f=Color:Blue_0074D9::Gender:men'}, {'title': 'More Sports Shoes', 'url': 'sports-shoes?f=Gender:men'}], 'relatedStyles': None, 'disclaimerTitle': '', 'productDetails': [{'type': None, 'content': None, 'title': 'Product Details', 'description': "<b>FEATURES + BENEFITS</b><br>HYBRID: PUMA's combination of two of its best technologies: IGNITE foam and NRGY beads<br>IGNITE: PUMA's foam midsole and branded heel cage supports and stabilises by locking the heel onto the platform<br>NRGY: PUMA's foam midsole offers superior cushion from heel to toe so you can power through your run<br>Heel-to-toe drop: 12mm<br><br><b>Product Design Details</b><ul><li>A pair of blue & brown running sports shoes, has regular styling, lace-up detail</li><li>Low boot silhouette</li><li>Lightweight synthetic upper</li><li>Overlays to secure the heel</li><li>Classic tongue</li><li>Lace-up closure</li><li>Rubber outsole for traction and durability</li><li>PUMA Wordmark at the tongue</li><li>PUMA Cat Logo at heel</li><li>Warranty: 3 months</li><li>Warranty provided by brand/manufacturer</li></ul><br><b>PRODUCT STORY</b><br>Change the name of the game with the HYBRID Fuego running sneakers. This bold colour-blocked shoe pairs a HYBRID foam midsole and a grippy rubber outsole for the ultimate in comfort and stability while still maintaining a stylish edge."}, {'type': None, 'content': None, 'title': 'MATERIAL & CARE', 'description': 'Textile<br>Wipe with a clean, dry cloth to remove dust'}], 'preOrder': None, 'sizeChartDisclaimerText': '', 'tags': None, 'articleAttributes': {'Ankle Height': 'Regular', 'Arch Type': 'Medium', 'Cleats': 'No Cleats', 'Cushioning': 'Medium', 'Distance': 'Medium', 'Fastening': 'Lace-Ups', 'Material': 'Textile', 'Outsole Type': 'Marking', 'Pronation for Running Shoes': 'Neutral', 'Running Type': 'Road Running', 'Sole Material': 'Rubber', 'Sport': 'Running', 'Surface Type': 'Outdoor', 'Technology': 'NA', 'Warranty': '3 months'}, 'systemAttributes': [], 'ratings': None, 'urgency': [{'value': '0', 'type': 'PURCHASED', 'ptile': 0}, {'value': '0', 'type': 'CART', 'ptile': 0}, {'value': '0', 'type': 'WISHLIST', 'ptile': 0}, {'value': '0', 'type': 'PDP', 'ptile': 0}], 'catalogAttributes': {'catalogDate': '1576751286000', 'season': 'summer', 'year': '2020'}, 'productContentGroupEntries': [{'title': '', 'type': 'DETAILS', 'attributes': [{'attributeName': 'Product Details', 'attributeType': 'STRING', 'value': "<b>FEATURES + BENEFITS</b><br>HYBRID: PUMA's combination of two of its best technologies: IGNITE foam and NRGY beads<br>IGNITE: PUMA's foam midsole and branded heel cage supports and stabilises by locking the heel onto the platform<br>NRGY: PUMA's foam midsole offers superior cushion from heel to toe so you can power through your run<br>Heel-to-toe drop: 12mm<br><br><b>Product Design Details</b><ul><li>A pair of blue & brown running sports shoes, has regular styling, lace-up detail</li><li>Low boot silhouette</li><li>Lightweight synthetic upper</li><li>Overlays to secure the heel</li><li>Classic tongue</li><li>Lace-up closure</li><li>Rubber outsole for traction and durability</li><li>PUMA Wordmark at the tongue</li><li>PUMA Cat Logo at heel</li><li>Warranty: 3 months</li><li>Warranty provided by brand/manufacturer</li></ul><br><b>PRODUCT STORY</b><br>Change the name of the game with the HYBRID Fuego running sneakers. This bold colour-blocked shoe pairs a HYBRID foam midsole and a grippy rubber outsole for the ultimate in comfort and stability while still maintaining a stylish edge."}, {'attributeName': 'Material & Care', 'attributeType': 'STRING', 'value': 'Textile<br>Wipe with a clean, dry cloth to remove dust'}, {'attributeName': 'Style Note', 'attributeType': 'STRING', 'value': "You'll look and feel super stylish in these trendsetting sports shoes by Puma. Match this blue pair with track pants and a sleeveless sports T-shirt when heading out for a casual day with friends."}]}], 'shoppableLooks': None, 'descriptors': [{'title': 'description', 'description': "<b>FEATURES + BENEFITS</b><br>HYBRID: PUMA's combination of two of its best technologies: IGNITE foam and NRGY beads<br>IGNITE: PUMA's foam midsole and branded heel cage supports and stabilises by locking the heel onto the platform<br>NRGY: PUMA's foam midsole offers superior cushion from heel to toe so you can power through your run<br>Heel-to-toe drop: 12mm<br><br><b>Product Design Details</b><ul><li>A pair of blue & brown running sports shoes, has regular styling, lace-up detail</li><li>Low boot silhouette</li><li>Lightweight synthetic upper</li><li>Overlays to secure the heel</li><li>Classic tongue</li><li>Lace-up closure</li><li>Rubber outsole for traction and durability</li><li>PUMA Wordmark at the tongue</li><li>PUMA Cat Logo at heel</li><li>Warranty: 3 months</li><li>Warranty provided by brand/manufacturer</li></ul><br><b>PRODUCT STORY</b><br>Change the name of the game with the HYBRID Fuego running sneakers. This bold colour-blocked shoe pairs a HYBRID foam midsole and a grippy rubber outsole for the ultimate in comfort and stability while still maintaining a stylish edge."}, {'title': 'style_note', 'description': "You'll look and feel super stylish in these trendsetting sports shoes by Puma. Match this blue pair with track pants and a sleeveless sports T-shirt when heading out for a casual day with friends."}, {'title': 'materials_care_desc', 'description': 'Textile<br>Wipe with a clean, dry cloth to remove dust'}], 'flags': {'isExchangeable': True, 'isReturnable': True, 'openBoxPickupEnabled': True, 'tryAndBuyEnabled': True, 'isLarge': False, 'isHazmat': False, 'isFragile': False, 'isJewellery': False, 'outOfStock': False, 'codEnabled': True, 'globalStore': False, 'loyaltyPointsEnabled': False, 'emiEnabled': True, 'chatEnabled': False, 'measurementModeEnabled': False, 'sampleModeEnabled': False, 'disableBuyButton': False}, 'earlyBirdOffer': None, 'serviceability': {'launchDate': '', 'returnPeriod': 30, 'descriptors': ['Pay on delivery might be available', 'Easy 30 days returns and exchanges', 'Try & Buy might be available'], 'procurementTimeInDays': {'6206': 4}}, 'buyButtonSellerOrder': [{'skuId': 38724440, 'sellerPartnerId': 6206}, {'skuId': 38724442, 'sellerPartnerId': 6206}, {'skuId': 38724446, 'sellerPartnerId': 6206}, {'skuId': 38724450, 'sellerPartnerId': 6206}, {'skuId': 38724452, 'sellerPartnerId': 6206}, {'skuId': 38724444, 'sellerPartnerId': 6206}, {'skuId': 38724448, 'sellerPartnerId': 6206}], 'sellers': [{'sellerPartnerId': 6206, 'sellerName': 'Puma Sports India Pvt. Ltd.(NSCM)'}], 'sizes': [{'skuId': 38724440, 'styleId': 11203218, 'action': '/product/11203218/related/6?co=1', 'label': '6', 'available': True, 'sizeType': 'UK Size', 'originalStyle': True, 'measurements': [{'type': 'Body Measurement', 'name': 'To Fit Foot Length', 'value': '24.5', 'minValue': '24.5', 'maxValue': '24.5', 'unit': 'cm', 'displayText': '24.5cm'}], 'allSizesList': [{'scaleCode': 'uk_size', 'sizeValue': '6', 'size': 'UK Size', 'order': 1, 'prefix': 'UK'}, {'scaleCode': 'us_size', 'sizeValue': '7', 'size': 'US Size', 'order': 2, 'prefix': 'US'}, {'scaleCode': 'euro_size', 'sizeValue': '39', 'size': 'Euro Size', 'order': 3, 'prefix': 'EURO'}], 'sizeSellerData': [{'mrp': 6499, 'sellerPartnerId': 6206, 'availableCount': 32, 'sellableInventoryCount': 32, 'warehouses': ['106', '328'], 'supplyType': 'ON_HAND', 'discountId': '11203218:23363948', 'discountedPrice': 2924}]}, {'skuId': 38724442, 'styleId': 11203218, 'action': '/product/11203218/related/7?co=1', 'label': '7', 'available': True, 'sizeType': 'UK Size', 'originalStyle': True, 'measurements': [{'type': 'Body Measurement', 'name': 'To Fit Foot Length', 'value': '25.4', 'minValue': '25.4', 'maxValue': '25.4', 'unit': 'cm', 'displayText': '25.4cm'}], 'allSizesList': [{'scaleCode': 'uk_size', 'sizeValue': '7', 'size': 'UK Size', 'order': 1, 'prefix': 'UK'}, {'scaleCode': 'us_size', 'sizeValue': '8', 'size': 'US Size', 'order': 2, 'prefix': 'US'}, {'scaleCode': 'euro_size', 'sizeValue': '40.5', 'size': 'Euro Size', 'order': 3, 'prefix': 'EURO'}], 'sizeSellerData': [{'mrp': 6499, 'sellerPartnerId': 6206, 'availableCount': 86, 'sellableInventoryCount': 86, 'warehouses': ['106'], 'supplyType': 'ON_HAND', 'discountId': '11203218:23363948', 'discountedPrice': 2924}]}, {'skuId': 38724444, 'styleId': 11203218, 'action': '/product/11203218/related/8?co=1', 'label': '8', 'available': True, 'sizeType': 'UK Size', 'originalStyle': True, 'measurements': [{'type': 'Body Measurement', 'name': 'To Fit Foot Length', 'value': '26.2', 'minValue': '26.2', 'maxValue': '26.2', 'unit': 'cm', 'displayText': '26.2cm'}], 'allSizesList': [{'scaleCode': 'uk_size', 'sizeValue': '8', 'size': 'UK Size', 'order': 1, 'prefix': 'UK'}, {'scaleCode': 'us_size', 'sizeValue': '9', 'size': 'US Size', 'order': 2, 'prefix': 'US'}, {'scaleCode': 'euro_size', 'sizeValue': '42', 'size': 'Euro Size', 'order': 3, 'prefix': 'EURO'}], 'sizeSellerData': [{'mrp': 6499, 'sellerPartnerId': 6206, 'availableCount': 188, 'sellableInventoryCount': 188, 'warehouses': ['106'], 'supplyType': 'ON_HAND', 'discountId': '11203218:23363948', 'discountedPrice': 2924}]}, {'skuId': 38724446, 'styleId': 11203218, 'action': '/product/11203218/related/9?co=1', 'label': '9', 'available': True, 'sizeType': 'UK Size', 'originalStyle': True, 'measurements': [{'type': 'Body Measurement', 'name': 'To Fit Foot Length', 'value': '27.1', 'minValue': '27.1', 'maxValue': '27.1', 'unit': 'cm', 'displayText': '27.1cm'}], 'allSizesList': [{'scaleCode': 'uk_size', 'sizeValue': '9', 'size': 'UK Size', 'order': 1, 'prefix': 'UK'}, {'scaleCode': 'us_size', 'sizeValue': '10', 'size': 'US Size', 'order': 2, 'prefix': 'US'}, {'scaleCode': 'euro_size', 'sizeValue': '43', 'size': 'Euro Size', 'order': 3, 'prefix': 'EURO'}], 'sizeSellerData': [{'mrp': 6499, 'sellerPartnerId': 6206, 'availableCount': 163, 'sellableInventoryCount': 163, 'warehouses': ['106'], 'supplyType': 'ON_HAND', 'discountId': '11203218:23363948', 'discountedPrice': 2924}]}, {'skuId': 38724448, 'styleId': 11203218, 'action': '/product/11203218/related/10?co=1', 'label': '10', 'available': True, 'sizeType': 'UK Size', 'originalStyle': True, 'measurements': [{'type': 'Body Measurement', 'name': 'To Fit Foot Length', 'value': '27.9', 'minValue': '27.9', 'maxValue': '27.9', 'unit': 'cm', 'displayText': '27.9cm'}], 'allSizesList': [{'scaleCode': 'uk_size', 'sizeValue': '10', 'size': 'UK Size', 'order': 1, 'prefix': 'UK'}, {'scaleCode': 'us_size', 'sizeValue': '11', 'size': 'US Size', 'order': 2, 'prefix': 'US'}, {'scaleCode': 'euro_size', 'sizeValue': '44.5', 'size': 'Euro Size', 'order': 3, 'prefix': 'EURO'}], 'sizeSellerData': [{'mrp': 6499, 'sellerPartnerId': 6206, 'availableCount': 153, 'sellableInventoryCount': 153, 'warehouses': ['106'], 'supplyType': 'ON_HAND', 'discountId': '11203218:23363948', 'discountedPrice': 2924}]}, {'skuId': 38724450, 'styleId': 11203218, 'action': '/product/11203218/related/11?co=1', 'label': '11', 'available': True, 'sizeType': 'UK Size', 'originalStyle': True, 'measurements': [{'type': 'Body Measurement', 'name': 'To Fit Foot Length', 'value': '28.8', 'minValue': '28.8', 'maxValue': '28.8', 'unit': 'cm', 'displayText': '28.8cm'}], 'allSizesList': [{'scaleCode': 'uk_size', 'sizeValue': '11', 'size': 'UK Size', 'order': 1, 'prefix': 'UK'}, {'scaleCode': 'us_size', 'sizeValue': '12', 'size': 'US Size', 'order': 2, 'prefix': 'US'}, {'scaleCode': 'euro_size', 'sizeValue': '46', 'size': 'Euro Size', 'order': 3, 'prefix': 'EURO'}], 'sizeSellerData': [{'mrp': 6499, 'sellerPartnerId': 6206, 'availableCount': 43, 'sellableInventoryCount': 43, 'warehouses': ['106'], 'supplyType': 'ON_HAND', 'discountId': '11203218:23363948', 'discountedPrice': 2924}]}, {'skuId': 38724452, 'styleId': 11203218, 'action': '/product/11203218/related/12?co=1', 'label': '12', 'available': False, 'sizeType': 'UK Size', 'originalStyle': True, 'measurements': [{'type': 'Body Measurement', 'name': 'To Fit Foot Length', 'value': '29.6', 'minValue': '29.6', 'maxValue': '29.6', 'unit': 'cm', 'displayText': '29.6cm'}], 'allSizesList': [{'scaleCode': 'uk_size', 'sizeValue': '12', 'size': 'UK Size', 'order': 1, 'prefix': 'UK'}, {'scaleCode': 'us_size', 'sizeValue': '13', 'size': 'US Size', 'order': 2, 'prefix': 'US'}, {'scaleCode': 'euro_size', 'sizeValue': '47', 'size': 'Euro Size', 'order': 3, 'prefix': 'EURO'}], 'sizeSellerData': []}], 'discounts': [{'type': 1, 'freeItem': False, 'label': '(55% OFF)', 'discountText': '', 'timerStart': '0', 'timerEnd': '1597084200', 'discountPercent': 55, 'offer': '', 'discountId': '11203218:23363948', 'heading': None, 'description': None, 'link': None, 'freeItemImage': None}], 'offers': [{'type': 'EMI', 'title': 'EMI option available', 'description': '', 'action': '/faqs', 'image': None}], 'bundledSkus': None, 'richPdp': None, 'landingPageUrl': 'sports-shoes/puma/puma-men-blue-hybrid-fuego-running-shoes/11203218/buy'}, 'pageName': 'Pdp', 'atsa': ['Sport', 'Material', 'Fastening', 'Ankle Height', 'Outsole Type', 'Cleats', 'Pronation for Running Shoes', 'Arch Type', 'Cushioning', 'Running Type', 'Warranty', 'Distance', 'Number of Components', 'Surface Type', 'Technology']}