Jay Bird schrieb:
I've been trying to figure out a simple algorithm on how to combine a
list of parts that have 1D locations that overlap into a non-
overlapping list. For example, here would be my input:
part name location
a 5-9
b 7-10
c 3-6
d 15-20
e 18-23
And here is what I need for an output:
part name location
c.a.b 3-10
d.e 15-23
Suppose you have your data given as a dictionary:
data = {'a'

5,9),
'b'

7,10),
'c'

3,6),
'd'

15,20),
'e'

18,23)}
Then the following not particularly elegant
but very simple and straight forward
algorithm will solve your problem:
def values(key):
return set(range(data[key][0], data[key][1]+1))
keys = list(data.keys())
result = []
while keys:
k = [keys[0]]
keys = keys[1:]
v = values(k[0])
for l in keys[:]:
if v.intersection(values(l)):
v.update(values(l))
k.append(l)
keys.remove(l)
result.append((k,v))
# print(result) ## if you like
for k, v in result:
print(".".join(sorted(k)), "%d-%d" % (min(v), max(v)))
Regards,
Gregor
I've tried various methods, which all fail. Does anyone have an idea
how to do this?
I was thinking sets, too.
import random
def overlap():
merge_count = 0
datam.append(data.pop(0))
while len(data)>0:
d0 = data.pop(0)
dmi = 0
dmtemp = datam[:]
while d0:
dmtest = d0[1].intersection(dmtemp[dmi][1])
#print(d0,dmtemp[dmi],dmtest)
if len(dmtest)>0: # overlap
dmtest = d0[1].union(dmtemp[dmi][1])
datam[dmi] = (d0[0]+dmtemp[dmi][0],dmtest)
d0 = None
dmi = 0
merge_count += 1
else:
dmi += 1
if dmi == len(dmtemp):
datam.append(d0)
d0 = None
return merge_count # repeat until 0 returned
## OP data
##data=[]
##data.append(('a',set([i for i in range(5,9+1)])))
##data.append(('b',set([i for i in range(7,10+1)])))
##data.append(('c',set([i for i in range(3,6+1)])))
##data.append(('d',set([i for i in range(15,20+1)])))
##data.append(('e',set([i for i in range(18,23+1)])))
##datam = []
##
##print()
##for j in data: print(j)
##print()
##
##
##overlap()
##
##for i in datam:
## print(i[0],min(i[1]),max(i[1]))
##
## ('a', {8, 9, 5, 6, 7})
## ('b', {8, 9, 10, 7})
## ('c', {3, 4, 5, 6})
## ('d', {15, 16, 17, 18, 19, 20})
## ('e', {18, 19, 20, 21, 22, 23})
##
## cba 3 10
## ed 15 23
## special case - repeat overlap test until no merges
data = [('A', {79, 80, 81, 82, 83, 84, 85, 86}),
('B', {96, 89, 90, 91, 92, 93, 94, 95}),
('C', {21, 22, 23, 24, 25, 26, 27, 28}),
('D', {64, 65, 66, 67, 68, 69, 62, 63}),
('E', {72, 73, 74, 75, 76, 77, 78, 79}),
('F', {65, 66, 67, 68, 69, 70, 71, 72}),
('G', {11, 12, 13, 14, 15, 16, 17, 18}),
('H', {24, 25, 26, 27, 28, 29, 30, 31}),
('I', {32, 33, 34, 35, 36, 37, 38, 31}),
('J', {81, 82, 83, 84, 85, 86, 87, 88})]
datam = []
## ('A', {55, 56, 57, 58, 59, 60, 61, 62})
## ('B', {64, 57, 58, 59, 60, 61, 62, 63})
## ('C', {10, 11, 12, 13, 14, 15, 16, 17})
## ('D', {32, 25, 26, 27, 28, 29, 30, 31})
## ('E', {54, 55, 56, 57, 58, 59, 60, 61})
## ('F', {64, 65, 66, 59, 60, 61, 62, 63})
## ('G', {41, 42, 43, 44, 45, 46, 47, 48})
## ('H', {67, 68, 69, 70, 71, 72, 73, 74})
## ('I', {55, 56, 57, 58, 59, 60, 61, 62})
## ('J', {64, 65, 66, 67, 68, 69, 62, 63})
##
## JIFEBA 54 69
## C 10 17
## D 25 32
## G 41 48
## H 67 74 <--- NFG overlaps JIFEBA
print()
for j in data: print(j)
print()
merges = 1 # corrects above
while merges > 0:
merges = overlap()
print(merges)
if merges>0:
data = datam[:]
datam = []
for i in datam:
print(i[0],min(i[1]),max(i[1]))
## corrected
##
## ('A', {79, 80, 81, 82, 83, 84, 85, 86})
## ('B', {96, 89, 90, 91, 92, 93, 94, 95})
## ('C', {21, 22, 23, 24, 25, 26, 27, 28})
## ('D', {64, 65, 66, 67, 68, 69, 62, 63})
## ('E', {72, 73, 74, 75, 76, 77, 78, 79})
## ('F', {65, 66, 67, 68, 69, 70, 71, 72})
## ('G', {11, 12, 13, 14, 15, 16, 17, 18})
## ('H', {24, 25, 26, 27, 28, 29, 30, 31})
## ('I', {32, 33, 34, 35, 36, 37, 38, 31})
## ('J', {81, 82, 83, 84, 85, 86, 87, 88})
##
## 5
## 1
## 0
## DJFEA 62 88
## B 89 96
## IHC 21 38
## G 11 18
# random sequences
data = []
for j in range(12):
q = random.randint(0,90)
r = q+12
data.append((chr(j+65),set([x for x in range(q,r)])))
datam = []
print()
for j in data: print(j)
print()
merges = 1 # corrects above
while merges > 0:
merges = overlap()
print(merges)
if merges>0:
data = datam[:]
datam = []
for i in datam:
print(i[0],min(i[1]),max(i[1]))
## ('A', {3, 4, 5, 6, 7, 8, 9, 10})
## ('B', {76, 77, 78, 79, 80, 81, 82, 83})
## ('C', {43, 44, 45, 46, 47, 48, 49, 50})
## ('D', {42, 43, 44, 45, 46, 47, 48, 49})
## ('E', {23, 24, 25, 26, 27, 28, 29, 30})
## ('F', {49, 50, 51, 52, 53, 54, 55, 56})
## ('G', {76, 77, 78, 79, 80, 81, 82, 83})
## ('H', {1, 2, 3, 4, 5, 6, 7, 8})
## ('I', {37, 38, 39, 40, 41, 42, 43, 44})
## ('J', {83, 84, 85, 86, 87, 88, 89, 90})
##
## 6
## 0
## HA 1 10
## JGB 76 90
## IFDC 37 56
## E 23 30
## ('A', {64, 65, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63})
## ('B', {34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45})
## ('C', {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27})
## ('D', {70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81})
## ('E', {64, 65, 66, 67, 56, 57, 58, 59, 60, 61, 62, 63})
## ('F', {34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45})
## ('G', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
## ('H', {64, 65, 66, 55, 56, 57, 58, 59, 60, 61, 62, 63})
## ('I', {44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55})
## ('J', {64, 65, 66, 67, 68, 57, 58, 59, 60, 61, 62, 63})
## ('K', {96, 97, 98, 99, 88, 89, 90, 91, 92, 93, 94, 95})
## ('L', {96, 97, 98, 99, 100, 89, 90, 91, 92, 93, 94, 95})
##
## 6
## 1
## 0
## FBJIHEA 34 68
## C 16 27
## D 70 81
## G 0 11
## LK 88 100