R
Rüdiger Werner
Hello!
Out of curiosity and to learn a little bit about the numpy package i've
tryed to implement
a vectorised version of the 'Sieve of Zakiya'.
While the code itself works fine it is astounding for me that the numpy
Version is almost 7 times slower than
the pure python version. I tryed to find out if i am doing something wrong
but wasn't able to find any answer.
Some experimentation showed that indexing into an numpy array is the
bottleneck that slows down my code.
However i don't understand why.
So i am looking for any explaination why the numpy version is that slow
(i expected it to be at least as fast as the pure python version).
Thank you in advance
1000005 Erat: 0.219000 ZakijaP5: 0.219000 VZakijaP5: 1.578000
2000005 Erat: 0.468000 ZakijaP5: 0.469000 VZakijaP5: 3.297000
3000005 Erat: 0.719000 ZakijaP5: 0.703000 VZakijaP5: 5.000000
4000005 Erat: 0.937000 ZakijaP5: 0.985000 VZakijaP5: 6.734000
5000005 Erat: 1.219000 ZakijaP5: 1.218000 VZakijaP5: 8.172000
6000005 Erat: 1.500000 ZakijaP5: 1.531000 VZakijaP5: 9.829000
7000005 Erat: 1.781000 ZakijaP5: 1.813000 VZakijaP5: 11.500000
8000005 Erat: 2.047000 ZakijaP5: 2.094000 VZakijaP5: 13.187000
9000005 Erat: 2.312000 ZakijaP5: 2.391000 VZakijaP5: 14.891000
10000005 Erat: 2.578000 ZakijaP5: 2.672000 VZakijaP5: 16.641000
11000005 Erat: 2.891000 ZakijaP5: 2.953000 VZakijaP5: 18.203000
12000005 Erat: 3.110000 ZakijaP5: 3.297000 VZakijaP5: 19.937000
#------------------------------
Prime_Sieves.py--------------------------------------
from math import sqrt
def sieveOfErat(end):
if end < 2: return []
lng = (end-1)>>1
sieve = [True]*(lng+1)
for i in xrange(int(sqrt(end)) >> 1):
if not sieve: continue
for j in xrange( (i*(i + 3) << 1) + 3, lng, (i << 1) + 3):
sieve[j] = False
primes = [2]
primes.extend([(i << 1) + 3 for i in xrange(lng) if sieve])
return primes
def SoZP5(val):
if val < 3 : return [2]
if val < 5 : return [2,3]
lndx = (val-1)/2
sieve = [0]*(lndx+15)
sieve[0:14] = 0 , 1 , 2, 0, 4, 5, 0, 7, 8, 0, 10, 0, 0, 13
for _i in xrange(14, lndx , 15):
sieve[_i:_i+15] = _i -1, 0, 0, _i + 2 , 0, _i + 4, _i + 5, 0, _i +7,
_i + 8, 0, _i + 10, 0, 0, _i + 13
for i in xrange(2, (int(sqrt(val))-3/2)+1):
if not sieve: continue
k = 30*i+45
p1 = 7*i+9 # 7 (2i+3)
p2 = 11*i+15 # 11 (2i+3)
p3 = 13*i+18 # 13 (2i+3)
p4 = 17*i+24 # 17 (2i+3)
p5 = 19*i+27 # 19 (2i+3)
p6 = 23*i+33 # 23 (2i+3)
p7 = 29*i+42 # 29 (2i+3)
p8 = 31*i+45 # 31 (2i+3)
for _i in xrange(0, lndx, k):
try:
sieve[_i+p1] = 0
sieve[_i+p2] = 0
sieve[_i+p3] = 0
sieve[_i+p4] = 0
sieve[_i+p5] = 0
sieve[_i+p6] = 0
sieve[_i+p7] = 0
sieve[_i+p8] = 0
except (IndexError, ):
break
primes = [(i*2)+3 for i in xrange(2, lndx) if sieve]
primes[0:0] = [2,3,5]
return primes
from numpy import array, zeros
def VSoZP5(val):
""" Vectorised Version of Sieve of Zakia"""
if val < 3 : return [2]
if val < 5 : return [2,3]
lndx = (val-1)/2
sieve = zeros(lndx+15,dtype="int32") #<-- Offending line: Indexing a
numpy array is slow
sieve[0:14] = 0 , 1 , 2, 0, 4, 5, 0, 7, 8, 0, 10, 0, 0, 13
for _i in xrange(14, lndx , 15):
sieve[_i:_i+15] = _i -1, 0, 0, _i + 2 , 0, _i + 4, _i + 5, 0, _i +7,
_i + 8, 0, _i + 10, 0, 0, _i + 13
om = array([9, 15, 18, 24, 27, 33, 42, 45], dtype="int32")
bm = array([7, 11, 13, 17, 19, 23, 29, 31], dtype="int32")
for i in xrange(2, (int(sqrt(val))-3/2)+1):
if not sieve: continue
k = 30*i+45
rm = (bm * i) + om
for _i in xrange(0, lndx/k + 1):
try:
sieve[rm] = 0 #<-- Offending line: Indexing a numpy array
is slow
rm += k
except (IndexError,):
break
#
primes = [(i*2)+3 for i in xrange(2, lndx) if sieve]
primes[0:0] = [2,3,5]
return primes
if __name__ == '__main__':
import time
for j in range(5, 20000007, 1000000):
print j,
a = time.time()
soe = sieveOfErat(j)
print "Erat: %2f" % (time.time() -a,),
a = time.time()
soz5 = SoZP5(j)
print "ZakijaP5: %2f" % (time.time() -a),
a = time.time()
sovz5 = VSoZP5(j)
print "VZakijaP5: %2f" % (time.time() -a)
assert soe == soz5 == sovz5
#------------------------------
Prime_Sieves.py--------------------------------------
Out of curiosity and to learn a little bit about the numpy package i've
tryed to implement
a vectorised version of the 'Sieve of Zakiya'.
While the code itself works fine it is astounding for me that the numpy
Version is almost 7 times slower than
the pure python version. I tryed to find out if i am doing something wrong
but wasn't able to find any answer.
Some experimentation showed that indexing into an numpy array is the
bottleneck that slows down my code.
However i don't understand why.
So i am looking for any explaination why the numpy version is that slow
(i expected it to be at least as fast as the pure python version).
Thank you in advance
5 Erat: 0.000000 ZakijaP5: 0.000000 VZakijaP5: 0.000000pythonw -u "PrimeSieve.py"
1000005 Erat: 0.219000 ZakijaP5: 0.219000 VZakijaP5: 1.578000
2000005 Erat: 0.468000 ZakijaP5: 0.469000 VZakijaP5: 3.297000
3000005 Erat: 0.719000 ZakijaP5: 0.703000 VZakijaP5: 5.000000
4000005 Erat: 0.937000 ZakijaP5: 0.985000 VZakijaP5: 6.734000
5000005 Erat: 1.219000 ZakijaP5: 1.218000 VZakijaP5: 8.172000
6000005 Erat: 1.500000 ZakijaP5: 1.531000 VZakijaP5: 9.829000
7000005 Erat: 1.781000 ZakijaP5: 1.813000 VZakijaP5: 11.500000
8000005 Erat: 2.047000 ZakijaP5: 2.094000 VZakijaP5: 13.187000
9000005 Erat: 2.312000 ZakijaP5: 2.391000 VZakijaP5: 14.891000
10000005 Erat: 2.578000 ZakijaP5: 2.672000 VZakijaP5: 16.641000
11000005 Erat: 2.891000 ZakijaP5: 2.953000 VZakijaP5: 18.203000
12000005 Erat: 3.110000 ZakijaP5: 3.297000 VZakijaP5: 19.937000
#------------------------------
Prime_Sieves.py--------------------------------------
from math import sqrt
def sieveOfErat(end):
if end < 2: return []
lng = (end-1)>>1
sieve = [True]*(lng+1)
for i in xrange(int(sqrt(end)) >> 1):
if not sieve: continue
for j in xrange( (i*(i + 3) << 1) + 3, lng, (i << 1) + 3):
sieve[j] = False
primes = [2]
primes.extend([(i << 1) + 3 for i in xrange(lng) if sieve])
return primes
def SoZP5(val):
if val < 3 : return [2]
if val < 5 : return [2,3]
lndx = (val-1)/2
sieve = [0]*(lndx+15)
sieve[0:14] = 0 , 1 , 2, 0, 4, 5, 0, 7, 8, 0, 10, 0, 0, 13
for _i in xrange(14, lndx , 15):
sieve[_i:_i+15] = _i -1, 0, 0, _i + 2 , 0, _i + 4, _i + 5, 0, _i +7,
_i + 8, 0, _i + 10, 0, 0, _i + 13
for i in xrange(2, (int(sqrt(val))-3/2)+1):
if not sieve: continue
k = 30*i+45
p1 = 7*i+9 # 7 (2i+3)
p2 = 11*i+15 # 11 (2i+3)
p3 = 13*i+18 # 13 (2i+3)
p4 = 17*i+24 # 17 (2i+3)
p5 = 19*i+27 # 19 (2i+3)
p6 = 23*i+33 # 23 (2i+3)
p7 = 29*i+42 # 29 (2i+3)
p8 = 31*i+45 # 31 (2i+3)
for _i in xrange(0, lndx, k):
try:
sieve[_i+p1] = 0
sieve[_i+p2] = 0
sieve[_i+p3] = 0
sieve[_i+p4] = 0
sieve[_i+p5] = 0
sieve[_i+p6] = 0
sieve[_i+p7] = 0
sieve[_i+p8] = 0
except (IndexError, ):
break
primes = [(i*2)+3 for i in xrange(2, lndx) if sieve]
primes[0:0] = [2,3,5]
return primes
from numpy import array, zeros
def VSoZP5(val):
""" Vectorised Version of Sieve of Zakia"""
if val < 3 : return [2]
if val < 5 : return [2,3]
lndx = (val-1)/2
sieve = zeros(lndx+15,dtype="int32") #<-- Offending line: Indexing a
numpy array is slow
sieve[0:14] = 0 , 1 , 2, 0, 4, 5, 0, 7, 8, 0, 10, 0, 0, 13
for _i in xrange(14, lndx , 15):
sieve[_i:_i+15] = _i -1, 0, 0, _i + 2 , 0, _i + 4, _i + 5, 0, _i +7,
_i + 8, 0, _i + 10, 0, 0, _i + 13
om = array([9, 15, 18, 24, 27, 33, 42, 45], dtype="int32")
bm = array([7, 11, 13, 17, 19, 23, 29, 31], dtype="int32")
for i in xrange(2, (int(sqrt(val))-3/2)+1):
if not sieve: continue
k = 30*i+45
rm = (bm * i) + om
for _i in xrange(0, lndx/k + 1):
try:
sieve[rm] = 0 #<-- Offending line: Indexing a numpy array
is slow
rm += k
except (IndexError,):
break
#
primes = [(i*2)+3 for i in xrange(2, lndx) if sieve]
primes[0:0] = [2,3,5]
return primes
if __name__ == '__main__':
import time
for j in range(5, 20000007, 1000000):
print j,
a = time.time()
soe = sieveOfErat(j)
print "Erat: %2f" % (time.time() -a,),
a = time.time()
soz5 = SoZP5(j)
print "ZakijaP5: %2f" % (time.time() -a),
a = time.time()
sovz5 = VSoZP5(j)
print "VZakijaP5: %2f" % (time.time() -a)
assert soe == soz5 == sovz5
#------------------------------
Prime_Sieves.py--------------------------------------