(e-mail address removed) a écrit :
Problem:
You have a list of unknown length,
This doesn't exist in Python:
len(alist)
such as this: list =
[X,X,X,O,O,O,O]. You want to extract all and only the X's.
braindead solution - relying on zeros being zeros or any other False value:
all_xxx = filter(None, [X,X,X,O,O,O,O])
You know
the X's are all up front and you know that the item after the last X is
an O, or that the list ends with an X. There are never O's between
X's.
Then it *may* be possible to optimize - but code will break as soon as
this rule change. So is there a real need for optimisation ? (hint: dont
guess, profile)
FWIW, I've collected all suggestions here (and perhaps some others) and
hacked a Q&D benchmark. Usage is:
python test_lists.py [repeat [list_size [xcount]]
where:
* repeat is the number of iteration, default to 1000
* list_size is the size of the list, default to 100
* xcount is the number or non-zero values in the list, default is random
I've run it a few times, and it seems that in most cases,
* the bisect-like approach (Alex Martelli / Karl Friedrich Bolz) is
the winner
* the while/pop approach of the OP (slightly rewritten...) is really wrong
FWIW, one of the dummiest possible approach IMHO
(ie filter(None, alist)) behaves quite correctly wrt some other
'optimized' approachs - and is still safe if the rules about the
composition of the list changes... Could it be that no optimization is
sometime the best optimisation ?-)
# test_lists.py
from itertools import takewhile
from timeit import Timer
from random import randint
def get_list(size, xcount=None):
if xcount is None:
xcount = randint(1, size)
else:
assert xcount <= size
zcount = size - xcount
return [1] * xcount + [0] * zcount
def with_while(alist):
res = []
while alist and alist[0]:
res.append(alist.pop(0))
return res
def with_for(alist):
res = []
for x in alist:
if not x: break
res.append(x)
return res
def with_filter(alist):
return filter(None, alist)
def with_comprehension(alist):
return [x for x in alist if x]
def with_takewhile(alist):
return list(takewhile(lambda x: x!=0, alist))
def with_slice_try(alist):
try:
return alist[:alist.index(0)]
except ValueError:
return alist[:]
def with_slice_safe(alist):
alist.append(0)
return alist[:alist.index(0)]
def with_delslice_safe(alist):
alist.append(0)
del alist[alist.index(0):]
return alist
def with_sect(alist):
low = 0
high = len(alist)
while low < high:
mid = (low + high) // 2
if alist[mid] == 0:
high = mid
else:
low = mid + 1
return alist[:low]
_candidates = [n for n, o in locals().copy().items() \
if n.startswith('with_') and callable(o)]
def run_test(repeat=1000, list_size='100', xcount='None'):
global _candidate
print """
params :
* repeat : %s
* list_size : %s
* xcounts : %s
""" % (repeat, list_size, xcount)
results = {}
for n in _candidates:
stm, stp = ('%s(get_list(%s, %s))' % (n, list_size, xcount),
'from __main__ import %s, get_list' % n)
results[n] = Timer(stm, stp).timeit(repeat)
sorted_results = sorted([(time, (n, time)) \
for n, time in results.items()])
for _, result in sorted_results:
print "%s : %s" % result
def main(args):
try:
repeat = int(args.pop(0))
except:
repeat = 1000
try:
list_size = args.pop(0)
except:
list_size = '100'
try:
xcount = args.pop(0)
except:
xcount = 'None' # -> random
run_test(repeat, list_size, xcount)
if __name__ == '__main__':
import sys
main(sys.argv[1:])
HTH