A possible solution is to first load the csv into Pandas and then convert it row by row into XML, as so:
import pandas as pd
df = pd.read_csv('untitled.txt', sep='|')
With the sample data (assuming separator and so on) loaded as:
Title Type Format Year Rating Stars \
0 Enemy Behind War,Thriller DVD 2003 PG 10
1 Transformers Anime,Science Fiction DVD 1989 R 9
Description
0 Talk about...
1 A Schientific fiction
And then converting to xml with a custom function:
def convert_row(row):
return """<movietitle="%s">
<type>%s</type>
<format>%s</format>
<year>%s</year>
<rating>%s</rating>
<stars>%s</stars>
<description>%s</description>
</movie>""" % (
row.Title, row.Type, row.Format, row.Year, row.Rating, row.Stars, row.Description)
print '\n'.join(df.apply(convert_row, axis=1))
This way you get a string containing the xml:
<movietitle="Enemy Behind">
<type>War,Thriller</type>
<format>DVD</format>
<year>2003</year>
<rating>PG</rating>
<stars>10</stars>
<description>Talk about...</description>
</movie>
<movietitle="Transformers">
<type>Anime,Science Fiction</type>
<format>DVD</format>
<year>1989</year>
<rating>R</rating>
<stars>9</stars>
<description>A Schientific fiction</description>
</movie>
that you can dump in to a file or whatever.
Inspired by this great answer.
Edit: Using the loading method you posted (or a version that actually loads the data to a variable):
import csv
f = open('movies2.csv')
csv_f = csv.reader(f)
data = []
for row in csv_f:
data.append(row)
f.close()
print data[1:]
We get:
[['Enemy Behind', 'War', 'Thriller', 'DVD', '2003', 'PG', '10', 'Talk about...'], ['Transformers', 'Anime', 'Science Fiction', 'DVD', '1989', 'R', '9', 'A Schientific fiction']]
And we can convert to XML with minor modifications:
def convert_row(row):
return """<movietitle="%s">
<type>%s</type>
<format>%s</format>
<year>%s</year>
<rating>%s</rating>
<stars>%s</stars>
<description>%s</description>
</movie>""" % (row[0], row[1], row[2], row[3], row[4], row[5], row[6])
print '\n'.join([convert_row(row) for row in data[1:]])
Getting identical results:
<movietitle="Enemy Behind">
<type>War</type>
<format>Thriller</format>
<year>DVD</year>
<rating>2003</rating>
<stars>PG</stars>
<description>10</description>
</movie>
<movietitle="Transformers">
<type>Anime</type>
<format>Science Fiction</format>
<year>DVD</year>
<rating>1989</rating>
<stars>R</stars>
<description>9</description>
</movie>
Answer from robertoia on Stack OverflowA possible solution is to first load the csv into Pandas and then convert it row by row into XML, as so:
import pandas as pd
df = pd.read_csv('untitled.txt', sep='|')
With the sample data (assuming separator and so on) loaded as:
Title Type Format Year Rating Stars \
0 Enemy Behind War,Thriller DVD 2003 PG 10
1 Transformers Anime,Science Fiction DVD 1989 R 9
Description
0 Talk about...
1 A Schientific fiction
And then converting to xml with a custom function:
def convert_row(row):
return """<movietitle="%s">
<type>%s</type>
<format>%s</format>
<year>%s</year>
<rating>%s</rating>
<stars>%s</stars>
<description>%s</description>
</movie>""" % (
row.Title, row.Type, row.Format, row.Year, row.Rating, row.Stars, row.Description)
print '\n'.join(df.apply(convert_row, axis=1))
This way you get a string containing the xml:
<movietitle="Enemy Behind">
<type>War,Thriller</type>
<format>DVD</format>
<year>2003</year>
<rating>PG</rating>
<stars>10</stars>
<description>Talk about...</description>
</movie>
<movietitle="Transformers">
<type>Anime,Science Fiction</type>
<format>DVD</format>
<year>1989</year>
<rating>R</rating>
<stars>9</stars>
<description>A Schientific fiction</description>
</movie>
that you can dump in to a file or whatever.
Inspired by this great answer.
Edit: Using the loading method you posted (or a version that actually loads the data to a variable):
import csv
f = open('movies2.csv')
csv_f = csv.reader(f)
data = []
for row in csv_f:
data.append(row)
f.close()
print data[1:]
We get:
[['Enemy Behind', 'War', 'Thriller', 'DVD', '2003', 'PG', '10', 'Talk about...'], ['Transformers', 'Anime', 'Science Fiction', 'DVD', '1989', 'R', '9', 'A Schientific fiction']]
And we can convert to XML with minor modifications:
def convert_row(row):
return """<movietitle="%s">
<type>%s</type>
<format>%s</format>
<year>%s</year>
<rating>%s</rating>
<stars>%s</stars>
<description>%s</description>
</movie>""" % (row[0], row[1], row[2], row[3], row[4], row[5], row[6])
print '\n'.join([convert_row(row) for row in data[1:]])
Getting identical results:
<movietitle="Enemy Behind">
<type>War</type>
<format>Thriller</format>
<year>DVD</year>
<rating>2003</rating>
<stars>PG</stars>
<description>10</description>
</movie>
<movietitle="Transformers">
<type>Anime</type>
<format>Science Fiction</format>
<year>DVD</year>
<rating>1989</rating>
<stars>R</stars>
<description>9</description>
</movie>
I tried to generalize robertoia's function convert_row for any header instead of writing it by hand.
import csv
import pandas as pd
f = open('movies2.csv')
csv_f = csv.reader(f)
data = []
for row in csv_f:
data.append(row)
f.close()
df = pd.read_csv('movies2.csv')
header= list(df.columns)
def convert_row(row):
str_row = """<%s>%s</%s> \n"""*(len(header)-1)
str_row = """<%s>%s""" +"\n"+ str_row + """</%s>"""
var_values = [list_of_elments[k] for k in range(1,len(header)) for list_of_elments in [header,row,header]]
var_values = [header[0],row[0]]+var_values+[header[0]]
var_values =tuple(var_values)
return str_row % var_values
text ="""<collection shelf="New Arrivals">"""+"\n"+'\n'.join([convert_row(row) for row in data[1:]])+"\n" +"</collection >"
print(text)
with open('output.xml', 'w') as myfile:
myfile.write(text)
Of course with pandas now, it is simpler to just use
to_xml() :
df= pd.read_csv('movies2.csv')
with open('outputf.xml', 'w') as myfile:
myfile.write(df.to_xml())
Python CSV to XML converter - Code Review Stack Exchange
Read xml column inside csv file with Python
Python convert CSV to XML with pandas groupby - Stack Overflow
Convert CSV to XML by LXML & Pandas in python - Stack Overflow
Videos
Using pandas and BeautifulSoup you can achieve your expected output easily:
#Code:
import pandas as pd
import itertools
from bs4 import BeautifulSoup as b
with open("file.xml", "r") as f: # opening xml file
content = f.read()
soup = b(content, "lxml")
pkgeid = [ values.text for values in soup.findAll("pkgeid")]
pkgname = [ values.text for values in soup.findAll("pkgname")]
time = [ values.text for values in soup.findAll("time")]
oper = [ values.text for values in soup.findAll("oper")]
# For python-3.x use `zip_longest` method
# For python-2.x use 'izip_longest method
data = [item for item in itertools.zip_longest(time, oper, pkgeid, pkgname)]
df = pd.DataFrame(data=data)
df.to_csv("sample.csv",index=False, header=None)
#output in `sample.csv` file will be as follows:
2015-09-16T04:13:20Z,Create_Product,10,BBCWRL
2015-09-16T04:13:20Z,Create_Product,18,CNNINT
2018-04-01T03:30:28Z,Deactivate_Dhct,,
Using Pandas, parsing all xml fields.
import xml.etree.ElementTree as ET
import pandas as pd
tree = ET.parse("file.xml")
root = tree.getroot()
get_range = lambda col: range(len(col))
l = [{r[i].tag:r[i].text for i in get_range(r)} for r in root]
df = pd.DataFrame.from_dict(l)
df.to_csv('file.csv')
why not use df.to_xml()? This simple api will generate xml from the dataframe
ElementTree can do it without a trouble. See below
from collections import defaultdict
from xml.etree.ElementTree import Element, SubElement,ElementTree
data = defaultdict(list)
with open('in.txt') as f:
next(f)
for line in f:
parts = line.split(',')
data[parts[0]].append(parts[1].strip())
root = Element('root')
for k,v in data.items():
sub = SubElement(root,'coupon-codes',attrib={'coupon-id':k})
for vv in v:
sub_v = SubElement(sub,'code')
sub_v.text = vv
tree = ElementTree(root)
with open('out.xml', 'w') as f:
tree.write(f, encoding='unicode')