A
Antoon Pardon
I have the following code I wrote.
==============================================
from difflib import SequenceMatcher
import sys
write = sys.stdout.write
warn = sys.stderr.write
def program(argv):
ls1 = open(argv[1]).readlines()
ls2 = open(argv[2]).readlines()
matcher = SequenceMatcher(ls1, ls2)
s1 = 0
s2 = 0
print ls1
print ls2
warn("*** %d %d \n" % (len(ls1), len(ls2)))
for e1, e2, lg in matcher.get_matching_blocks():
warn("*** %d %d %d\n" % (e1, e2, lg))
for i in xrange(s1, e1):
write('- ')
write(ls1)
for i in xrange(s2, e2):
write('+ ')
write(ls2)
for i in xrange(e1, e1+lg):
write(' ')
write(ls1)
s1, s2 = e1 + lg, e2 + lg
if __name__ == '__main__':
program(sys.argv)
===============================================
Now when I run it I get the following result:
python diff.py map.0 map.1
['\n', 'begin\n', ' a1\n', ' a2\n', ' a3\n', ' a4\n', ' a5\n', 'end\n', '\n', 'begin\n', ' c1\n', ' c2\n', ' c3\n', ' c4\n', ' c5\n', ' c6\n', ' c7\n', 'end\n', '\n', 'begin\n', ' e1\n', ' e2\n', ' e3\n', ' e4\n', ' e5\n', ' e6\n', ' e7\n', ' e8\n', ' e9\n', 'end\n']
['\n', 'begin\n', ' a1\n', ' a2\n', ' a3\n', ' a4\n', ' a5\n', 'end\n', '\n', 'begin\n', ' c1\n', ' c2\n', ' c3\n', ' c4\n', ' c5\n', ' c6\n', ' c7\n', 'end\n', '\n', 'begin\n', ' d1\n', ' d2\n', ' d3\n', 'end\n', '\n', 'begin\n', ' e1\n', ' e2\n', ' e3\n', ' e4\n', ' e5\n', ' e6\n', ' e7\n', ' e8\n', ' e9\n', 'end\n']
*** 30 36
*** 36 0 0
-
- begin
- a1
- a2
- a3
- a4
....
- Traceback (most recent call last):
File "diff.py", line 31, in <module>
program(sys.argv)
File "diff.py", line 21, in program
write(ls1)
IndexError: list index out of range
What I don't understand is: The first list is 30 items long and the second 36.
But the first match I get after calling get_matching_blocks says the match starts
at item 36 of the first list.
Yes I noticed it is the sepcial last match with 0 siza,e but even if that would be
correct because there would be no common items, the first number of the match
shouldn't be more than the length of the first list.
What am I doing wrong?
==============================================
from difflib import SequenceMatcher
import sys
write = sys.stdout.write
warn = sys.stderr.write
def program(argv):
ls1 = open(argv[1]).readlines()
ls2 = open(argv[2]).readlines()
matcher = SequenceMatcher(ls1, ls2)
s1 = 0
s2 = 0
print ls1
print ls2
warn("*** %d %d \n" % (len(ls1), len(ls2)))
for e1, e2, lg in matcher.get_matching_blocks():
warn("*** %d %d %d\n" % (e1, e2, lg))
for i in xrange(s1, e1):
write('- ')
write(ls1)
for i in xrange(s2, e2):
write('+ ')
write(ls2)
for i in xrange(e1, e1+lg):
write(' ')
write(ls1)
s1, s2 = e1 + lg, e2 + lg
if __name__ == '__main__':
program(sys.argv)
===============================================
Now when I run it I get the following result:
python diff.py map.0 map.1
['\n', 'begin\n', ' a1\n', ' a2\n', ' a3\n', ' a4\n', ' a5\n', 'end\n', '\n', 'begin\n', ' c1\n', ' c2\n', ' c3\n', ' c4\n', ' c5\n', ' c6\n', ' c7\n', 'end\n', '\n', 'begin\n', ' e1\n', ' e2\n', ' e3\n', ' e4\n', ' e5\n', ' e6\n', ' e7\n', ' e8\n', ' e9\n', 'end\n']
['\n', 'begin\n', ' a1\n', ' a2\n', ' a3\n', ' a4\n', ' a5\n', 'end\n', '\n', 'begin\n', ' c1\n', ' c2\n', ' c3\n', ' c4\n', ' c5\n', ' c6\n', ' c7\n', 'end\n', '\n', 'begin\n', ' d1\n', ' d2\n', ' d3\n', 'end\n', '\n', 'begin\n', ' e1\n', ' e2\n', ' e3\n', ' e4\n', ' e5\n', ' e6\n', ' e7\n', ' e8\n', ' e9\n', 'end\n']
*** 30 36
*** 36 0 0
-
- begin
- a1
- a2
- a3
- a4
....
- Traceback (most recent call last):
File "diff.py", line 31, in <module>
program(sys.argv)
File "diff.py", line 21, in program
write(ls1)
IndexError: list index out of range
What I don't understand is: The first list is 30 items long and the second 36.
But the first match I get after calling get_matching_blocks says the match starts
at item 36 of the first list.
Yes I noticed it is the sepcial last match with 0 siza,e but even if that would be
correct because there would be no common items, the first number of the match
shouldn't be more than the length of the first list.
What am I doing wrong?