Why is this slower?

J

Joseph Reagle

I would think the commented code would be faster (fewer loops), but it is
not (because of function calls).


#Average user_time = 5.9975 over 4 iterations
inSRC = set([bio.name for bio in bios.values()])
inEB = set([bio.name for bio in bios.values() if bio.eb_title])
inWP = set([bio.name for bio in bios.values() if bio.wp_title])
inBoth = inEB & inWP
missingEB = inSRC - inEB
missingWP = inSRC - inWP
missingBoth = missingEB & missingWP
avg_size_ratio = find_average(
[bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc])
mdn_size_ratio = find_median(
[bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc])
SRCfem = set([bio.name for bio in bios.values() if bio.gender
== 'female'])
EBfem = set([bio.name for bio in bios.values() if bio.eb_gender
== 'female'])
WPfem = set([bio.name for bio in bios.values() if bio.wp_gender
== 'female'])
SRCmale = set([bio.name for bio in bios.values() if bio.gender
== 'male'])
EBmale = set([bio.name for bio in bios.values() if bio.eb_gender
== 'male'])
WPmale = set([bio.name for bio in bios.values() if bio.wp_gender
== 'male'])
SRCun = set([bio.name for bio in bios.values() if bio.gender
== 'unknown'])
EBun = set([bio.name for bio in bios.values() if bio.eb_gender
== 'unknown'])
WPun = set([bio.name for bio in bios.values() if bio.wp_gender
== 'unknown'])

##Average user_time = 6.0025 over 4 iterations
#def set_amend(obj, bio):
#if obj == None:
#obj = set([])
#obj.add(bio.name)
#return obj

#inSRC = set([])
#inSRC = set([])
#inEB = set([])
#inWP = set([])
#SRCfem = set([])
#EBfem = set([])
#WPfem = set([])
#SRCmale = set([])
#EBmale = set([])
#WPmale = set([])
#SRCun = set([])
#EBun = set([])
#WPun = set([])

#for bio in bios.values():
### use a function that takes set name (creates one) and conditional
#inSRC = set_amend(inSRC, bio)
#if bio.eb_title: inEB = set_amend(inEB, bio)
#if bio.wp_title: inWP = set_amend(inWP, bio)
#if bio.gender == 'female': SRCfem = set_amend(SRCfem, bio)
#if bio.eb_gender == 'female': EBfem = set_amend(EBfem, bio)
#if bio.wp_gender == 'female': WPfem = set_amend(WPfem,bio)
#if bio.gender == 'male': SRCmale = set_amend(SRCmale, bio)
#if bio.eb_gender == 'male': EBmale = set_amend(EBmale, bio)
#if bio.wp_gender == 'male': WPmale = set_amend(WPmale, bio)
#if bio.gender == 'unknown': SRCun = set_amend(SRCun, bio)
#if bio.eb_gender == 'unknown': EBun = set_amend(EBun, bio)
#if bio.wp_gender == 'unknown': WPun = set_amend(WPun, bio)
#inBoth = inEB & inWP
#missingEB = inSRC - inEB
#missingWP = inSRC - inWP
#missingBoth = missingEB & missingWP
#avg_size_ratio = find_average(
#[bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc])
#mdn_size_ratio = find_median(
#[bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc])
 
N

nn

I would think the commented code would be faster (fewer loops), but it is
not (because of function calls).

    #Average user_time = 5.9975 over 4 iterations
    inSRC = set([bio.name for bio in bios.values()])
    inEB = set([bio.name for bio in bios.values() if bio.eb_title])
    inWP = set([bio.name for bio in bios.values() if bio.wp_title])
    inBoth = inEB & inWP
    missingEB = inSRC - inEB
    missingWP = inSRC - inWP
    missingBoth = missingEB & missingWP
    avg_size_ratio = find_average(
        [bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc])
    mdn_size_ratio = find_median(
        [bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc])
    SRCfem = set([bio.name for bio in bios.values() if bio.gender
== 'female'])
    EBfem = set([bio.name for bio in bios.values() if bio.eb_gender
== 'female'])
    WPfem = set([bio.name for bio in bios.values() if bio.wp_gender
== 'female'])
    SRCmale = set([bio.name for bio in bios.values() if bio.gender
== 'male'])
    EBmale = set([bio.name for bio in bios.values() if bio.eb_gender
== 'male'])
    WPmale = set([bio.name for bio in bios.values() if bio.wp_gender
== 'male'])
    SRCun = set([bio.name for bio in bios.values() if bio.gender
== 'unknown'])
    EBun = set([bio.name for bio in bios.values() if bio.eb_gender
== 'unknown'])
    WPun = set([bio.name for bio in bios.values() if bio.wp_gender
== 'unknown'])

    ##Average user_time = 6.0025 over 4 iterations
    #def set_amend(obj, bio):
        #if obj == None:
            #obj = set([])
        #obj.add(bio.name)
        #return obj

    #inSRC = set([])
    #inSRC = set([])
    #inEB = set([])
    #inWP = set([])
    #SRCfem = set([])
    #EBfem = set([])
    #WPfem = set([])
    #SRCmale = set([])
    #EBmale = set([])
    #WPmale = set([])
    #SRCun = set([])
    #EBun = set([])
    #WPun = set([])

    #for bio in bios.values():
        ### use a function that takes set name (creates one) and conditional
        #inSRC = set_amend(inSRC, bio)
        #if bio.eb_title: inEB = set_amend(inEB, bio)
        #if bio.wp_title: inWP = set_amend(inWP, bio)
        #if bio.gender == 'female': SRCfem = set_amend(SRCfem, bio)
        #if bio.eb_gender == 'female': EBfem = set_amend(EBfem, bio)
        #if bio.wp_gender == 'female': WPfem = set_amend(WPfem,bio)
        #if bio.gender == 'male': SRCmale = set_amend(SRCmale, bio)
        #if bio.eb_gender == 'male': EBmale = set_amend(EBmale, bio)
        #if bio.wp_gender == 'male': WPmale = set_amend(WPmale, bio)
        #if bio.gender == 'unknown': SRCun = set_amend(SRCun, bio)
        #if bio.eb_gender == 'unknown': EBun = set_amend(EBun, bio)
        #if bio.wp_gender == 'unknown': WPun = set_amend(WPun, bio)
    #inBoth = inEB & inWP
    #missingEB = inSRC - inEB
    #missingWP = inSRC - inWP
    #missingBoth = missingEB & missingWP
    #avg_size_ratio = find_average(
        #[bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc])
    #mdn_size_ratio = find_median(
        #[bio.wp_ratio for bio in bios.values() if bio.wp_wc and bio.eb_wc])

Not only are you doing many function calls but you are assigning 12
objects each time. Why not do this?

for bio in bios.values():
inSRC.add(bio)
if bio.eb_title: inEB.add(bio)
if bio.wp_title: inWP.add(bio)
if bio.gender == 'female': SRCfem.add(bio)
if bio.eb_gender == 'female': EBfem.add(bio)
if bio.wp_gender == 'female': WPfem.add(bio)
if bio.gender == 'male': SRCmale.add(bio)
if bio.eb_gender == 'male': EBmale.add(bio)
if bio.wp_gender == 'male': WPmale.add(bio)
if bio.gender == 'unknown': SRCun.add(bio)
if bio.eb_gender == 'unknown': EBun.add(bio)
if bio.wp_gender == 'unknown': WPun.add(bio)
 
S

Stefan Behnel

Joseph said:
I would think the commented code would be faster (fewer loops), but it is
not (because of function calls).

You just answered your own question.

inSRC = set([bio.name for bio in bios.values()])

Note that this is actually very inefficient as it first creates a list of
values, then iterates over them to create a new list, and then creates a
set from that result. Py3 has set comprehensions, but in Py2.5+, you might
want to try .itervalues() and a generator expression instead.

Stefan
 
J

Joseph Reagle

nn said:
Not only are you doing many function calls but you are assigning 12
objects each time. Why not do this?

for bio in bios.values():
inSRC.add(bio)

That obviously makes sense, but I was trying to get away from the verbosity
of:

inSRC = set([])
inSRC = set([])
inEB = set([])
inWP = set([])
SRCfem = set([])
EBfem = set([])
WPfem = set([])
SRCmale = set([])
EBmale = set([])
WPmale = set([])
SRCun = set([])
EBun = set([])
WPun = set([])
 
P

Paul Rubin

Joseph Reagle said:
inSRC = set([bio.name for bio in bios.values()])

You should use:

inSRC = set(bio.name for bio in bios.values())

without the square brackets. That avoids creating an intermediate list.
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
474,189
Messages
2,571,016
Members
47,616
Latest member
gijoji4272

Latest Threads

Top