D
David C. Ullrich
Is there a csvlib out there somewhere?
And/or does anyone see any problems with
the code below?
What csvline does is straightforward: fields
is a list of strings. csvline(fields) returns
the strings concatenated into one string
separated by commas. Except that if a field
contains a comma or a double quote then the
double quote is escaped to a pair of double
quotes and the field is enclosed in double
quotes.
The part that seems somewhat hideous is
parsecsvline. The intention is that
parsecsvline(csvline(fields)) should be
the same as fields. Haven't attempted
to deal with parsecsvline(data) where
data is in an invalid format - in the
intended application data will always
be something that was returned by
csvline. It seems right after some
testing... also seems blechitudinous.
(Um: Believe it or not I'm _still_ using
python 1.5.7. So comments about iterators,
list comprehensions, string methods, etc
are irrelevent. Comments about errors in
the algorithm would be great. Thanks.)
The code:
from string import replace, join
def csvescape(s):
if ',' in s or '"' in s or '\n' in s:
res = replace(s, '"', '""')
return '"%s"' % res
else:
return s
def csvline(fields):
return join(map(csvescape, fields), ',')
class indexedstring:
def __init__(self, s):
self.s = s
self.index = 0
def current(self):
return self[self.index]
def inc(self):
self.index = self.index + 1
def next(self):
self.inc()
return self.current()
def __getitem__(self, j):
return self.s[j]
def __len__(self):
return len(self.s)
def eos(self):
return self.index >= len(self)
def lookahead(self):
return self[self.index + 1]
def getfield(self):
if self.eos():
return None
if self.current() == '"':
return self.quotedfield()
else:
return self.rawfield()
def rawfield(self):
"""Read until comma or eos."""
start = self.index
while not (self.eos() or (self.current() == ',')):
self.inc()
res = self.s[start:self.index]
self.inc()
return res
def quotedfield(self):
"""Read until '",' or '" followed by eos.
Replace "" in result with "."""
start = self.index
while 1:
self.inc()
if self.current() == '"':
self.inc()
if (self.eos() or (self.current()==',')):
break
res = self.s[start + 1:self.index - 1]
self.inc()
return replace(res, '""', '"')
def parsecsvline(csvline):
"""Inverts csvline(). Assumes csvline is valid, ie
is something as returned by csvline(); output undefined
if csvline is in invalid format"""
s = indexedstring(csvline)
res = []
while not s.eos():
res.append(s.getfield())
return res
************************
David C. Ullrich
And/or does anyone see any problems with
the code below?
What csvline does is straightforward: fields
is a list of strings. csvline(fields) returns
the strings concatenated into one string
separated by commas. Except that if a field
contains a comma or a double quote then the
double quote is escaped to a pair of double
quotes and the field is enclosed in double
quotes.
The part that seems somewhat hideous is
parsecsvline. The intention is that
parsecsvline(csvline(fields)) should be
the same as fields. Haven't attempted
to deal with parsecsvline(data) where
data is in an invalid format - in the
intended application data will always
be something that was returned by
csvline. It seems right after some
testing... also seems blechitudinous.
(Um: Believe it or not I'm _still_ using
python 1.5.7. So comments about iterators,
list comprehensions, string methods, etc
are irrelevent. Comments about errors in
the algorithm would be great. Thanks.)
The code:
from string import replace, join
def csvescape(s):
if ',' in s or '"' in s or '\n' in s:
res = replace(s, '"', '""')
return '"%s"' % res
else:
return s
def csvline(fields):
return join(map(csvescape, fields), ',')
class indexedstring:
def __init__(self, s):
self.s = s
self.index = 0
def current(self):
return self[self.index]
def inc(self):
self.index = self.index + 1
def next(self):
self.inc()
return self.current()
def __getitem__(self, j):
return self.s[j]
def __len__(self):
return len(self.s)
def eos(self):
return self.index >= len(self)
def lookahead(self):
return self[self.index + 1]
def getfield(self):
if self.eos():
return None
if self.current() == '"':
return self.quotedfield()
else:
return self.rawfield()
def rawfield(self):
"""Read until comma or eos."""
start = self.index
while not (self.eos() or (self.current() == ',')):
self.inc()
res = self.s[start:self.index]
self.inc()
return res
def quotedfield(self):
"""Read until '",' or '" followed by eos.
Replace "" in result with "."""
start = self.index
while 1:
self.inc()
if self.current() == '"':
self.inc()
if (self.eos() or (self.current()==',')):
break
res = self.s[start + 1:self.index - 1]
self.inc()
return replace(res, '""', '"')
def parsecsvline(csvline):
"""Inverts csvline(). Assumes csvline is valid, ie
is something as returned by csvline(); output undefined
if csvline is in invalid format"""
s = indexedstring(csvline)
res = []
while not s.eos():
res.append(s.getfield())
return res
************************
David C. Ullrich