Click here to Skip to main content
15,888,113 members
Please Sign up or sign in to vote.
0.00/5 (No votes)
See more:
I am new in python with xml and I need help

I know how to resolve but it is difficult to translate in a code

Mi file have the name of "Opta24"

her structure it is:

<Games timestamp="2012-09-11T10:20:32">
  <Game id="360481" away_team_id="43" away_team_name="Manchester City" competition_id="8" competition_name="English Barclays Premier League" game_date="2011-08-21T16:00:00" home_team_id="30" home_team_name="Bolton Wanderers" matchday="2" period_1_start="2011-08-21T16:00:38" period_2_start="2011-08-21T17:03:47" season_id="2011" season_name="Season 2011/2012">
    <Event id="301038339" event_id="1" type_id="34" period_id="16" min="0" sec="0" team_id="43" outcome="1" x="0.0" y="0.0" timestamp="2011-08-21T15:23:06.696" last_modified="2011-08-21T15:54:56">
      <Q id="2028397186" qualifier_id="130" value="4" />
      <Q id="1518776786" qualifier_id="227" value="0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0" />
      <Q id="997025056" qualifier_id="59" value="25, 2, 13, 18, 4, 6, 42, 7, 10, 16, 21, 5, 11, 15, 20, 22, 32, 45" />
      <Q id="955425655" qualifier_id="194" value="17476" />
      <Q id="996147927" qualifier_id="131" value="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0" />
      <Q id="1940069841" qualifier_id="44" value="1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5" />
      <Q id="1529687618" qualifier_id="30" value="15749, 20492, 42593, 1632, 17476, 7551, 14664, 15157, 42544, 37572, 20664, 20658, 19959, 65807, 56827, 17336, 20312, 42493" />
    </Event>
    <Event id="1475524684" event_id="1" type_id="34" period_id="16" min="0" sec="0" team_id="30" outcome="1" x="0.0" y="0.0" timestamp="2011-08-21T15:39:39.166" last_modified="2011-08-21T16:06:40">
      <Q id="1993329296" qualifier_id="59" value="22, 2, 4, 6, 5, 12, 7, 19, 17, 14, 10, 1, 3, 16, 20, 21, 31, 38" />
      <Q id="783602879" qualifier_id="131" value="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0" />
      <Q id="1981808255" qualifier_id="30" value="1344, 28183, 2004, 27696, 19419, 1587, 18428, 14668, 9765, 3630, 10089, 45175, 82263, 19930, 1615, 15188, 19958, 105088" />
      <Q id="1521261840" qualifier_id="194" value="3630" />
      <Q id="459356083" qualifier_id="227" value="0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0" />
      <Q id="2003349974" qualifier_id="130" value="2" />
      <Q id="1582676412" qualifier_id="44" value="1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5" />
    </Event>
    <Event id="2036897618" event_id="2" type_id="32" period_id="1" min="0" sec="0" team_id="30" outcome="1" x="0.0" y="0.0" timestamp="2011-08-21T16:00:38.967" last_modified="2011-08-21T16:00:39">
      <Q id="530297025" qualifier_id="127" value="Left to Right" />
    </Event>
    <Event id="336246484" event_id="2" type_id="32" period_id="1" min="0" sec="0" team_id="43" outcome="1" x="0.0" y="0.0" timestamp="2011-08-21T16:00:39.132" last_modified="2011-08-21T16:00:39">
      <Q id="1227488973" qualifier_id="127" value="Right to Left" />
    </Event>
    <Event id="1372839298" event_id="3" type_id="1" period_id="1" min="0" sec="1" player_id="37572" team_id="43" outcome="1" x="50.1" y="50.0" timestamp="2011-08-21T16:00:40.179" last_modified="2011-08-21T16:00:41">
      <Q id="541570642" qualifier_id="212" value="2.5" />
      <Q id="1202220317" qualifier_id="140" value="52.4" />
      <Q id="1922915435" qualifier_id="141" value="49.1" />
      <Q id="566075534" qualifier_id="56" value="Center" />
      <Q id="21729623" qualifier_id="213" value="6.0" />
    </Event>
    <Event id="978322590" event_id="4" type_id="1" period_id="1" min="0" sec="2" player_id="20664" team_id="43" outcome="1" x="48.2" y="49.1" timestamp="2011-08-21T16:00:41.585" last_modified="2011-08-21T16:00:44">
      <Q id="1842556548" qualifier_id="213" value="2.4" />
      <Q id="1580811978" qualifier_id="56" value="Back" />
      <Q id="1316578499" qualifier_id="140" value="29.0" />
      <Q id="870951602" qualifier_id="212" value="27.5" />
      <Q id="1361996302" qualifier_id="141" value="76.5" />
    </Event>
    <Event id="1572252644" event_id="3" type_id="7" period_id="1" min="0" sec="5" player_id="3630" team_id="30" outcome="1" x="66.4" y="13.7" timestamp="2011-08-21T16:00:44.304" last_modified="2011-08-21T16:38:51">
      <Q id="1492254907" qualifier_id="167" />
      <Q id="491183529" qualifier_id="56" value="Right" />
    </Event>
    <Event id="2077205137" event_id="5" type_id="3" period_id="1" min="0" sec="5" player_id="42593" team_id="43" outcome="0" x="28.3" y="86.1" timestamp="2011-08-21T16:00:44.304" last_modified="2011-08-22T12:29:34">
      <Q id="176412213" qualifier_id="56" value="Back" />
    </Event>
    <Event id="403196518" event_id="4" type_id="5" period_id="1" min="0" sec="7" player_id="3630" team_id="30" outcome="0" x="72.0" y="-1.1" timestamp="2011-08-21T16:00:46.279" last_modified="2011-08-21T16:00:46">
      <Q id="1046884000" qualifier_id="56" value="Right" />
    </Event>
    <Event id="509587597" event_id="6" type_id="5" period_id="1" min="0" sec="7" player_id="42593" team_id="43" outcome="1" x="25.9" y="101.2" timestamp="2011-08-21T16:00:46.444" last_modified="2011-08-21T16:00:58">
      <Q id="1487531325" qualifier_id="56" value="Back" />
    </Event>
    <Event id="1962550717" event_id="7" type_id="1" period_id="1" min="0" sec="19" player_id="42593" team_id="43" outcome="0" x="27.8" y="100.0" timestamp="2011-08-21T16:00:58.445" last_modified="2011-08-21T16:01:01">
      <Q id="1388961572" qualifier_id="56" value="Back" />
      <Q id="1464407097" qualifier_id="141" value="93.4" />
      <Q id="1290217595" qualifier_id="212" value="23.2" />
      <Q id="63165432" qualifier_id="107" />
      <Q id="1582662528" qualifier_id="213" value="6.1" />
      <Q id="742294617" qualifier_id="140" value="49.3" />
    </Event>
    <Event id="1886599927" event_id="5" type_id="1" period_id="1" min="0" sec="23" player_id="27696" team_id="30" outcome="1" x="50.9" y="20.0" timestamp="2011-08-21T16:01:02.466" last_modified="2011-08-21T16:01:17">
      <Q id="1358834683" qualifier_id="212" value="15.0" />
      <Q id="215350901" qualifier_id="140" value="63.6" />
      <Q id="1207838176" qualifier_id="3" />
      <Q id="1518806348" qualifier_id="141" value="30.1" />
      <Q id="71823979" qualifier_id="56" value="Center" />
      <Q id="922302198" qualifier_id="213" value="0.5" />
    </Event>
    <Event id="604657066" event_id="6" type_id="4" period_id="1" min="0" sec="26" player_id="3630" team_id="30" outcome="0" x="63.6" y="30.1" timestamp="2011-08-21T16:01:05.482" last_modified="2011-08-21T16:01:07">
      <Q id="554791351" qualifier_id="56" value="Center" />
      <Q id="1096167754" qualifier_id="13" />
    </Event>
    <Event id="497476041" event_id="8" type_id="4" period_id="1" min="0" sec="26" player_id="1632" team_id="43" outcome="1" x="31.7" y="75.8" timestamp="2011-08-21T16:01:05.507" last_modified="2011-08-21T16:01:06">
      <Q id="503664000" qualifier_id="13" />
      <Q id="956789322" qualifier_id="56" value="Back" />
    </Event>
    <Event id="854361320" event_id="9" type_id="1" period_id="1" min="0" sec="30" player_id="7551" team_id="43" outcome="1" x="29.3" y="73.0" timestamp="2011-08-21T16:01:09.507" last_modified="2011-08-21T16:01:11">
      <Q id="1681674682" qualifier_id="5" />
      <Q id="43769595" qualifier_id="212" value="11.9" />
      <Q id="1348926242" qualifier_id="213" value="4.8" />
      <Q id="1228266170" qualifier_id="140" value="30.2" />
      <Q id="2126971087" qualifier_id="56" value="Back" />
      <Q id="911744327" qualifier_id="141" value="55.6" />
    </Event>
    <Event id="959833686" event_id="10" type_id="1" period_id="1" min="0" sec="32" player_id="14664" team_id="43" outcome="1" x="31.9" y="53.6" timestamp="2011-08-21T16:01:11.663" last_modified="2011-08-21T16:01:13">
      <Q id="638330763" qualifier_id="140" value="42.3" />
      <Q id="816211408" qualifier_id="212" value="16.8" />
      <Q id="1375255370" qualifier_id="141" value="34.9" />
      <Q id="1175687287" qualifier_id="56" value="Back" />
      <Q id="1501159585" qualifier_id="213" value="5.4" />
    </Event>
    <Event id="1460827261" event_id="11" type_id="1" period_id="1" min="0" sec="33" player_id="20664" team_id="43" outcome="1" x="44.3" y="31.8" timestamp="2011-08-21T16:01:13.101" last_modified="2011-08-21T16:01:16">
      <Q id="1355697445" qualifier_id="213" value="5.1" />
      <Q id="1474043617" qualifier_id="140" value="51.8" />
      <Q id="491448639" qualifier_id="141" value="5.3" />
      <Q id="1696680483" qualifier_id="56" value="Right" />
      <Q id="1895256131" qualifier_id="212" value="19.7" />
    </Event>
    <Event id="834406608" event_id="12" type_id="1" period_id="1" min="0" sec="36" player_id="20492" team_id="43" outcome="1" x="52.1" y="5.3" timestamp="2011-08-21T16:01:15.992" last_modified="2011-08-21T16:01:20">
      <Q id="1242718866" qualifier_id="1" />
      <Q id="283189683" qualifier_id="141" value="17.6" />
      <Q id="1681189612" qualifier_id="213" value="0.2" />
      <Q id="193816719" qualifier_id="212" value="48.9" />
      <Q id="1022848643" qualifier_id="56" value="Right" />
      <Q id="431648281" qualifier_id="140" value="98.0" />


What I have tried:

And I want to

1) Search for the 'Game' tag with the findall method.
2) Inside that 'Game', search for all the 'Event' tags, again with the findall method
3) Implement a for loop for each event, and filter out those whose type_id attribute is equal to 1
4) Within each event, extract the fields we need and put them in the lists.
5) In addition, it is necessary to make a loop inside the loop to extract the qualifiers and to include them in the lists.
6) With the lists created, pass it all to a dataframe.


the 1 step it is something like

et_game = tree.findall("Game")


and the 2 it is this code but with []

the rest I don t know how to do it

thanks for all support
Posted
Updated 27-Oct-22 20:44pm
v2
Comments
Member 15627495 28-Oct-22 1:31am    
in Xml file ( or memory content/var )
you have a 'root'
within 'root', you have 'element', and 'elements' could have 'attribute(s)'.
'element(s)' and 'attribute(s)' have a name and could have a 'value'.

when you use findall('Game') , you fetch the xml tree from 'tree' var to retrieve all Xml elements with name 'Game'
<root> <element1_name attribute1_name='attribute1_value' attribute2_without_name> element1_value </element1_name> </root>

sometimes the lexic about root/nodes/sub-nodes is used

when you request Xml, you go through hierarchy elements and values.

can you mention please, the lib you use for python and xml ? I'll try to help you more

https://docs.python.org/3/library/xml.etree.elementtree.html
David Vazquez Bande 28-Oct-22 2:05am    
I use this. Code:

import pandas as pd

import xml.etree.ElementTree as ET

tree = ET.parse("files/OptaF24.xml")
raiz = tree.getroot()


and later I create empty list for fill up with the differents results with type_id=1, like:

team = []
time = []
minute = []
second = []
origenX = []
origenY = []
destinateX = []
destinateY = []
result = [] 



and next I don't know how to continue.
Member 15627495 28-Oct-22 2:13am    
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" viewBox="0 0 460 460" style="width: 16px;height:16px;" xml:space="preserve">
<g>
<path d="M425.934,0H171.662c-18.122,0-32.864,14.743-32.864,32.864v77.134h30V32.864c0-1.579,1.285-2.864,2.864-2.864h254.272<br ></path> c1.579,0,2.864,1.285,2.864,2.864v254.272c0,1.58-1.285,2.865-2.864,2.865h-74.729v30h74.729<br /> c18.121,0,32.864-14.743,32.864-32.865V32.864C458.797,14.743,444.055,0,425.934,0z">
<path d="M288.339,139.998H34.068c-18.122,0-32.865,14.743-32.865,32.865v254.272C1.204,445.257,15.946,460,34.068,460h254.272<br ></path> c18.122,0,32.865-14.743,32.865-32.864V172.863C321.206,154.741,306.461,139.998,288.339,139.998z M288.341,430H34.068<br /> c-1.58,0-2.865-1.285-2.865-2.864V172.863c0-1.58,1.285-2.865,2.865-2.865h254.272c1.58,0,2.865,1.285,2.865,2.865v254.273h0.001<br /> C291.206,428.715,289.92,430,288.341,430z">

et_event = et_game.findAll("Event")

but with 'findall' the following will work :
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" viewBox="0 0 460 460" style="width: 16px;height:16px;" xml:space="preserve">
<g>
<path d="M425.934,0H171.662c-18.122,0-32.864,14.743-32.864,32.864v77.134h30V32.864c0-1.579,1.285-2.864,2.864-2.864h254.272<br ></path> c1.579,0,2.864,1.285,2.864,2.864v254.272c0,1.58-1.285,2.865-2.864,2.865h-74.729v30h74.729<br /> c18.121,0,32.864-14.743,32.864-32.865V32.864C458.797,14.743,444.055,0,425.934,0z">
<path d="M288.339,139.998H34.068c-18.122,0-32.865,14.743-32.865,32.865v254.272C1.204,445.257,15.946,460,34.068,460h254.272<br ></path> c18.122,0,32.865-14.743,32.865-32.864V172.863C321.206,154.741,306.461,139.998,288.339,139.998z M288.341,430H34.068<br /> c-1.58,0-2.865-1.285-2.865-2.864V172.863c0-1.58,1.285-2.865,2.865-2.865h254.272c1.58,0,2.865,1.285,2.865,2.865v254.273h0.001<br /> C291.206,428.715,289.92,430,288.341,430z">

et_event = tree.findall("Event") 


the findall goes 'global' , it's not so accurate for a starter request. It's tricky. kind of shortcut very powerful

by Xml, you dig a bunch of 'organized' datas , one elements containing others elements. that is question 2
David Vazquez Bande 28-Oct-22 3:09am    
but the first line I have it yet, and it is with "game" but next line ( point 2 ) it inside it search "event"
Richard MacCutchan 28-Oct-22 4:52am    
I gave you a link to the ElementTree documentation yesterday. It contains details and samples of the functions you need to use.

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900