J
Jaiprakash Singh
hey i am working on scraping a site , so i am using multi-threading concept.
i wrote a code based on queue (thread safe) but still my code block out after sometime, please help , i have searched a lot but unable to resolve it. please help i stuck here.
my code is under ..
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import subprocess
import multiprocessing
import logging
from scrapy import cmdline
import time
logging.basicConfig(level=logging.DEBUG,
format='[%(levelname)s] (%(threadName)-10s) %(message)s',)
num_fetch_threads = 150
enclosure_queue = multiprocessing.JoinableQueue()
def main3(i, q):
for pth in iter(q.get, None):
try:
cmdline.execute(['scrapy', 'runspider', 'page3_second_scrapy_flipkart.py', '-a', 'pth=%s' %(pth)])
print pth
except:
pass
time.sleep(i + 2)
q.task_done()
q.task_done()
def main2(output):
procs = []
for i in range(num_fetch_threads):
procs.append(multiprocessing.Process(target=main3, args=(i, enclosure_queue,)))
#worker.setDaemon(True)
procs[-1].start()
for pth in output:
enclosure_queue.put(pth)
print '*** Main thread waiting'
enclosure_queue.join()
print '*** Done'
for p in procs:
enclosure_queue.put(None)
enclosure_queue.join()
for p in procs:
p.join()
i wrote a code based on queue (thread safe) but still my code block out after sometime, please help , i have searched a lot but unable to resolve it. please help i stuck here.
my code is under ..
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import subprocess
import multiprocessing
import logging
from scrapy import cmdline
import time
logging.basicConfig(level=logging.DEBUG,
format='[%(levelname)s] (%(threadName)-10s) %(message)s',)
num_fetch_threads = 150
enclosure_queue = multiprocessing.JoinableQueue()
def main3(i, q):
for pth in iter(q.get, None):
try:
cmdline.execute(['scrapy', 'runspider', 'page3_second_scrapy_flipkart.py', '-a', 'pth=%s' %(pth)])
print pth
except:
pass
time.sleep(i + 2)
q.task_done()
q.task_done()
def main2(output):
procs = []
for i in range(num_fetch_threads):
procs.append(multiprocessing.Process(target=main3, args=(i, enclosure_queue,)))
#worker.setDaemon(True)
procs[-1].start()
for pth in output:
enclosure_queue.put(pth)
print '*** Main thread waiting'
enclosure_queue.join()
print '*** Done'
for p in procs:
enclosure_queue.put(None)
enclosure_queue.join()
for p in procs:
p.join()