A
Albert-jan Roskam
Hi,
I wrote the program below to merge all xls files in a given directory into one multisheet xls file. It uses xlwt and xlrd. The xls files I use for input are generated by Spss. When I open and re-save the files in Excel, the program works, but when I use the xls files as they were created by Spss, the program returns an error message (see below). Some details: Python 2.4, xlwt version 0.7.0, xlrd version 0.5.2, Win NT.
Any ideas?
Thanks in advance!
Albert-Jan
"""
Merge all xls files in a given directory into one multisheet xls file.
The sheets get the orginal file name, without the extension.
File names should not exceed 31 characters, as this is the maximum
sheet name length
"""
import xlrd, xlwt
import glob, os.path
def merge_xls (in_dir, out_file="d:/merged_output.xls"):
xls_files = glob.glob(in_dir + "*.xls")
sheet_names = []
merged_book = xlwt.Workbook()
[sheet_names.append(os.path.basename(v)[:-4]) for k, v in enumerate(xls_files)]
for k, xls_file in enumerate(xls_files):
if len (sheet_names[k]) <= 31:
book = xlrd.open_workbook(xls_file)
ws = merged_book.add_sheet(sheet_names[k])
for sheetx in range(book.nsheets):
sheet = book.sheet_by_index(sheetx)
for rx in range(sheet.nrows):
for cx in range(sheet.ncols):
ws.write(rx, cx, sheet.cell_value(rx, cx))
else:
print "File name too long: <%s.xls> (maximum is 31 chars) " % (sheet_names[k])
print "File <%s.xls> is *not* included in the merged xls file." % (sheet_names[k])
merged_book.save(out_file)
print "---> Merged xls file written to %s using the following source files: " % (out_file)
for k, v in enumerate(sheet_names):
if len(v) <= 31: print "\t", str(k+1).zfill(3), "%s.xls" % (v)
merge_xls(in_dir="d:/temp/")
*** WARNING: OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero ***
put_cell 0 1
Traceback (most recent call last):
File "G:\generic_syntaxes\merge_xls.py", line 37, in -toplevel-
merge_xls(in_dir="d:/temp/")
File "G:\generic_syntaxes\merge_xls.py", line 21, in merge_xls
book = xlrd.open_workbook(xls_file)
File "C:\Python24\lib\site-packages\xlrd\__init__.py", line 139, in open_workbook
bk.get_sheets()
File "C:\Python24\lib\site-packages\xlrd\__init__.py", line 389, in get_sheets
sht = self.get_sheet(sheetno)
File "C:\Python24\lib\site-packages\xlrd\__init__.py", line 379, in get_sheet
sh.read(self)
File "C:\Python24\lib\site-packages\xlrd\sheet.py", line 285, in read
self.put_cell(rowx, colx, XL_CELL_TEXT, bk._sharedstrings[index])
File "C:\Python24\lib\site-packages\xlrd\sheet.py", line 214, in put_cell
self._cell_types[rowx][colx] = ctype
IndexError: list assignment index out of range
I wrote the program below to merge all xls files in a given directory into one multisheet xls file. It uses xlwt and xlrd. The xls files I use for input are generated by Spss. When I open and re-save the files in Excel, the program works, but when I use the xls files as they were created by Spss, the program returns an error message (see below). Some details: Python 2.4, xlwt version 0.7.0, xlrd version 0.5.2, Win NT.
Any ideas?
Thanks in advance!
Albert-Jan
"""
Merge all xls files in a given directory into one multisheet xls file.
The sheets get the orginal file name, without the extension.
File names should not exceed 31 characters, as this is the maximum
sheet name length
"""
import xlrd, xlwt
import glob, os.path
def merge_xls (in_dir, out_file="d:/merged_output.xls"):
xls_files = glob.glob(in_dir + "*.xls")
sheet_names = []
merged_book = xlwt.Workbook()
[sheet_names.append(os.path.basename(v)[:-4]) for k, v in enumerate(xls_files)]
for k, xls_file in enumerate(xls_files):
if len (sheet_names[k]) <= 31:
book = xlrd.open_workbook(xls_file)
ws = merged_book.add_sheet(sheet_names[k])
for sheetx in range(book.nsheets):
sheet = book.sheet_by_index(sheetx)
for rx in range(sheet.nrows):
for cx in range(sheet.ncols):
ws.write(rx, cx, sheet.cell_value(rx, cx))
else:
print "File name too long: <%s.xls> (maximum is 31 chars) " % (sheet_names[k])
print "File <%s.xls> is *not* included in the merged xls file." % (sheet_names[k])
merged_book.save(out_file)
print "---> Merged xls file written to %s using the following source files: " % (out_file)
for k, v in enumerate(sheet_names):
if len(v) <= 31: print "\t", str(k+1).zfill(3), "%s.xls" % (v)
merge_xls(in_dir="d:/temp/")
*** WARNING: OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero ***
put_cell 0 1
Traceback (most recent call last):
File "G:\generic_syntaxes\merge_xls.py", line 37, in -toplevel-
merge_xls(in_dir="d:/temp/")
File "G:\generic_syntaxes\merge_xls.py", line 21, in merge_xls
book = xlrd.open_workbook(xls_file)
File "C:\Python24\lib\site-packages\xlrd\__init__.py", line 139, in open_workbook
bk.get_sheets()
File "C:\Python24\lib\site-packages\xlrd\__init__.py", line 389, in get_sheets
sht = self.get_sheet(sheetno)
File "C:\Python24\lib\site-packages\xlrd\__init__.py", line 379, in get_sheet
sh.read(self)
File "C:\Python24\lib\site-packages\xlrd\sheet.py", line 285, in read
self.put_cell(rowx, colx, XL_CELL_TEXT, bk._sharedstrings[index])
File "C:\Python24\lib\site-packages\xlrd\sheet.py", line 214, in put_cell
self._cell_types[rowx][colx] = ctype
IndexError: list assignment index out of range