Amy G said:
I have seen something about this beofore on this forum, but my google search
didn't come up with the answer I am looking for.
I have a list of tuples. Each tuple is in the following format:
("data", "moredata", "evenmoredata", "date string")
The date string is my concern. This is the date stamp from an email.
The problem is that I have a whole bunch of variations when it comes to the
format that the date string is in. For example I could have the following
two tuples:
("data", "moredata", "evenmoredata", "Fri, 23 Jan 2004 00:06:15")
("data", "moredata", "evenmoredata", "Thursday, 22 January 2004 03:15:06")
I know there is some way to use the date string from each of these to get a
date usable by python, but I cannot figure it out.
I was trying to use time.strptime but have been unsuccesful thus far.
Any help is appreciated.
This is what I use to parse dates of unknown provinance.
It's laughably overengineered, and I don't include the day
of the week or the time. Given your examples, though,
those should be easy enough to deal with.
HTH
John Roth
class DateContainer(object):
_typeDict = {}
_stringValue = ""
_typeDict["stringValue"] = "String"
_typeDict["value"] = "String"
_value = ""
year = 1
month = 1
day = 1
bc = ""
def _checkUserFriendlyDate(self, date):
# The rules for a user friendly date are:
# 1. The year must be at least three digits, including
# leading zeroes if necessary. Day and numeric month
# may be no longer than 2 digits.
# 2. The month may be alphabetic or numeric. If it's
# alphabetic, it must be at least three letters long.
# 3. The epoch may be ad, bc, bce or ce. If omitted, it's
# assumed to be ad.
# 4. After removing the year, epoch and an alphabetic month,
# the remaining single piece is the day, or the piece that
# is greater than 12.
# 5. If two pieces remain, the first is the month, the second
# is the day. Both are between 1 and 12, inclusive.
partList = dateTimeParse(date)
if not(2 < len(partList) < 5):
raise ValueError, "incorrect part list: %s" % (partList,)
bc = self._findBC(partList)
if len(partList) != 3:
return "too many components in date: '%s'" % date
year = self._findYear(partList)
month = self._findAlphaMonth(partList)
if month != 0:
day = partList[0]
else:
day = self._findDay(partList)
if day:
month = partList[0]
else:
month, day = partList
year = self._checkNum(year, 4712)
day = self._checkNum(day, 31)
month = self._checkNum(month, 12)
if bc in ("AD", "CE"):
bc = ""
self.year, self.month, self.day, self.bc = year, month, day, bc
return True
def _checkNum(self, num, limit):
result = int(num)
if result > limit:
raise ValueError, "number '%s' out of range '%s'" % (num, limit)
return result
def _findBC(self, partList):
for i in range(len(partList)):
word = partList
if word in ("AD", "BC", "CE", "BCE"):
del partList
return word
# XXX if len(partList > 3): error
return ""
def _findYear(self, partList):
for i in range(len(partList)):
word = partList
if len(word) > 2 and word.isdigit():
del partList
return word
raise ValueError
def _findAlphaMonth(self, partList):
for i in range(len(partList)):
word = partList
if word.isalpha():
del partList
return ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN',
'JUL', 'AUG', 'SEP', 'OCT', 'NOV',
'DEC'].index(word[:3]) + 1
return 0
def _findDay(self, partList):
for i in range(len(partList)):
word = partList
if word.isdigit() and int(word) > 12:
del partList
return word
return ""
def _getStringValue(self):
return self._stringValue
def _setStringValue(self, value):
self._checkUserFriendlyDate(value)
self._stringValue = value
_typeDict["stringValue"] = "String"
stringValue = property(_getStringValue, _setStringValue,
doc="User Friendly Date")
def _getValue(self):
isoDate = "%04u-%02u-%02u %s" % (self.year, self.month, self.day,
self.bc)
return isoDate.strip()
def checkISODate(self, value):
year = self._checkNum(value[:4], 4712)
month = self._checkNum(value[5:7], 12)
day = self._checkNum(value[8:10], 31)
if len(value) > 10:
bc = value[11:]
if not (bc.upper() in ("AD", "BC", "BCE", "CE")):
raise ValueError
if bc in ("AD", "CE"):
bc = ""
self.year, self.month, self.day, self.bc = year, month, day, bc
return
def _setValue(self, value):
self._checkISODate(value)
isoDate = "%04u-%02u-%02u %s" % (self.year, self.month, self.day,
self.bc)
self.stringValue = isoDate
return None
value = property(_getValue, _setValue,
doc = "ISO Standard Format Date")