R
Robin Becker
An email in (e-mail address removed) claimed that the following loop in a
charting module was taking a long time
however, when I tried an experiment in python 2.7 using the script below I find
that the looping algorithms perform better. A naive loop using list += list
would appear to be an O(n**2) operation, but python seems to be doing better
than that. Also why does the append version fail so dismally. Is my test coded
wrongly or is pre-allocation of the list making this better than expected?
C:\code\tests>tpoints 86000 860000
#################### START n=86000 ####################
existing algorithm took 0.08 seconds
existing algorithm using list took 0.12 seconds
existing algorithm using list assuming length 2 took 0.12 seconds
map(list,row) took 0.16 seconds
[list(xy) for xy in row] took 0.28 seconds
[[xy[0],xy[1]] for xy in row] took 0.22 seconds
append algorithm took 0.19 seconds
#################### END n=86000 ####################
#################### START n=860000 ####################
existing algorithm took 0.86 seconds
existing algorithm using list took 1.33 seconds
existing algorithm using list assuming length 2 took 1.25 seconds
map(list,row) took 3.44 seconds
[list(xy) for xy in row] took 3.03 seconds
[[xy[0],xy[1]] for xy in row] took 2.70 seconds
append algorithm took 2.48 seconds
#################### END n=860000 ####################
#########################################
import sys, time
def main(n):
print 20*'#','START n=%s'%n,20*'#'
row = [(i,i+1) for i in xrange(2*n)]
print 'existing algorithm',
t0 = time.time()
points = []
for xy in row:
points += [xy[0],xy[1]]
t1 = time.time()
print 'took %.2f seconds' % (t1-t0)
print 'existing algorithm using list',
t0 = time.time()
points = []
for xy in row:
points += list(xy[:2])
t1 = time.time()
print 'took %.2f seconds' % (t1-t0)
print 'existing algorithm using list assuming length 2',
t0 = time.time()
points = []
for xy in row:
points += list(xy)
t1 = time.time()
print 'took %.2f seconds' % (t1-t0)
print 'map(list,row)',
t0 = time.time()
points = map(list,row)
t1 = time.time()
print 'took %.2f seconds' % (t1-t0)
print '[list(xy) for xy in row]',
t0 = time.time()
points = [list(xy) for xy in row]
t1 = time.time()
print 'took %.2f seconds' % (t1-t0)
print '[[xy[0],xy[1]] for xy in row]',
t0 = time.time()
points = [[xy[0],xy[1]] for xy in row]
t1 = time.time()
print 'took %.2f seconds' % (t1-t0)
print 'append algorithm',
t0 = time.time()
points = [].append
for xy in row:
points([xy[0],xy[1]])
points = points.__self__
t1 = time.time()
print 'took %.2f seconds' % (t1-t0)
print 20*'#','END n=%s'%n,20*'#','\n\n'
if __name__=='__main__':
if len(sys.argv)==1:
N = [86000]
else:
N = map(int,sys.argv[1:])
for n in N:
main(n)
#########################################
charting module was taking a long time
I use ReportLab 2.6 but I also found the problem in ReportLab daily from 01/29/2013 in /src/reportlab/graphics/charts/lineplots.py:
276 # Iterate over data columns.
277 if self.joinedLines:
278 points = []
279 for xy in row:
280 points += [xy[0], xy[1]]
If I use a list comprehension instead, the plot is generated within seconds or minutes:
278 points = [[xy[0], xy[1]] for xy in row]
however, when I tried an experiment in python 2.7 using the script below I find
that the looping algorithms perform better. A naive loop using list += list
would appear to be an O(n**2) operation, but python seems to be doing better
than that. Also why does the append version fail so dismally. Is my test coded
wrongly or is pre-allocation of the list making this better than expected?
C:\code\tests>tpoints 86000 860000
#################### START n=86000 ####################
existing algorithm took 0.08 seconds
existing algorithm using list took 0.12 seconds
existing algorithm using list assuming length 2 took 0.12 seconds
map(list,row) took 0.16 seconds
[list(xy) for xy in row] took 0.28 seconds
[[xy[0],xy[1]] for xy in row] took 0.22 seconds
append algorithm took 0.19 seconds
#################### END n=86000 ####################
#################### START n=860000 ####################
existing algorithm took 0.86 seconds
existing algorithm using list took 1.33 seconds
existing algorithm using list assuming length 2 took 1.25 seconds
map(list,row) took 3.44 seconds
[list(xy) for xy in row] took 3.03 seconds
[[xy[0],xy[1]] for xy in row] took 2.70 seconds
append algorithm took 2.48 seconds
#################### END n=860000 ####################
#########################################
import sys, time
def main(n):
print 20*'#','START n=%s'%n,20*'#'
row = [(i,i+1) for i in xrange(2*n)]
print 'existing algorithm',
t0 = time.time()
points = []
for xy in row:
points += [xy[0],xy[1]]
t1 = time.time()
print 'took %.2f seconds' % (t1-t0)
print 'existing algorithm using list',
t0 = time.time()
points = []
for xy in row:
points += list(xy[:2])
t1 = time.time()
print 'took %.2f seconds' % (t1-t0)
print 'existing algorithm using list assuming length 2',
t0 = time.time()
points = []
for xy in row:
points += list(xy)
t1 = time.time()
print 'took %.2f seconds' % (t1-t0)
print 'map(list,row)',
t0 = time.time()
points = map(list,row)
t1 = time.time()
print 'took %.2f seconds' % (t1-t0)
print '[list(xy) for xy in row]',
t0 = time.time()
points = [list(xy) for xy in row]
t1 = time.time()
print 'took %.2f seconds' % (t1-t0)
print '[[xy[0],xy[1]] for xy in row]',
t0 = time.time()
points = [[xy[0],xy[1]] for xy in row]
t1 = time.time()
print 'took %.2f seconds' % (t1-t0)
print 'append algorithm',
t0 = time.time()
points = [].append
for xy in row:
points([xy[0],xy[1]])
points = points.__self__
t1 = time.time()
print 'took %.2f seconds' % (t1-t0)
print 20*'#','END n=%s'%n,20*'#','\n\n'
if __name__=='__main__':
if len(sys.argv)==1:
N = [86000]
else:
N = map(int,sys.argv[1:])
for n in N:
main(n)
#########################################