Skip to content Skip to sidebar Skip to footer

Python3, Difflib Sequencematcher

the following takes in two strings, compares differences and return them both as identicals as well as their differences, separated by spaces (maintaining the length of the longest

Solution 1:

I have worked through resolving this, and since no one has posted a response I will post the progress and solution. The following code is progress ... it worked well when dealing with variations that had less offset but began to break when getting into larger differences, specifically in maintaining spacing (offset) in matching up the two.

from difflib import SequenceMatcher
import pdb


t1 = 'betty:  backstreetvboysareback"give.jpg"LAlarrygarryhannyhref="ang"_self'

t2 = 'betty:  backstreetvboysareback"lol.jpg"LAlarrygarryhannyhref="ang"_self'#t2 = 'bettyv:  backstreetvboysareback"lifeislike"LAlarrygarryhannyhref="in.php"_selff'#t2 = 'LA'#t2 = 'c give.'#t2 = 'give.'#t1 = 'betty :  backstreetvboysareback" i e      "LAlarrygarryhannyhref=" n    "_self'#t2 = 'betty :  backstreetvboysareback" i e      "LAlarrygarryhannyhref=" n    "_self'#o1 = '                                g v .jpg                          g           '#o2 = '     v                          l f islike                        i .php      '



matcher = SequenceMatcher(None, t1, t2)
blocks = matcher.get_matching_blocks()

#print(len(blocks))

bla1 = []
bla2 = []

#bla = (string), (first pos), (second pos), (pos1 + pos2), (pos + pos2 total positions added togeather)
dnt = Falsefor i inrange(len(blocks)):

    if i == 0:
      if blocks[i].a != 0and dnt == False:
        bla1.append([t1[blocks[i].a:blocks[i].b], 0, blocks[i].a, 0, 0])
        bla2.append([t2[blocks[i].a:blocks[i].b], 0, blocks[i].b, 0, 0])
        dnt = Trueif blocks[i].b != 0and dnt == False:
        bla2.append([t2[blocks[i].a:blocks[i].b], 0, blocks[i].b, 0, 0])
        bla1.append([t1[blocks[i].a:blocks[i].b], 0, blocks[i].a, 0, 0])
        dnt = Trueif i != len(blocks)-1:
        print(blocks[i])

        bla1.append([t1[blocks[i].a + blocks[i].size:blocks[i+1].a], blocks[i].a + blocks[i].size, blocks[i+1].a, 0, 0])
        bla2.append([t2[blocks[i].b + blocks[i].size:blocks[i+1].b], blocks[i].b + blocks[i].size, blocks[i+1].b, 0, 0])

#pdb.set_trace()

ttl = 0for i inrange(len(bla1)):
  cnt = bla1[i][2] - bla1[i][1]
  if cnt != 0:
    bla1[i][3] = cnt
  ttl = ttl + cnt
  bla1[i][4] = ttl

ttl = 0for i inrange(len(bla2)):
  cnt = bla2[i][2] - bla2[i][1]
  if cnt != 0:
    bla2[i][3] = cnt
  ttl = ttl + cnt
  bla2[i][4] = ttl

print(bla1)
print(bla2)

tt1 = ''
dif = 0
i = 0whileTrue:

  if i == 0:
    if bla1[i][3] >= bla2[i][3]: dif = bla1[i][3]
    if bla1[i][3] < bla2[i][3]: dif = bla2[i][3]  
    tt1 += t1[:bla1[i][1]] + '_'*dif

  if i <= len(bla1) -1:

    if bla1[i][3] >= bla2[i][3]: dif = bla1[i][3]
    if bla1[i][3] < bla2[i][3]: dif = bla2[i][3]

    iflen(bla1) != 1:
      if i == 0: tt1 += t1[bla1[i][1] + bla1[i][3]:bla1[i+1][1]]
      if i != 0and i != len(bla1)-1: tt1 += '_'*dif + t1[bla1[i][1] + bla1[i][3]:bla1[i+1][1]]
      if i == len(bla1)-1: tt1 += '_'*dif + t1[bla1[i][1] + bla1[i][3]:len(t1)]

    i = i+1print('t1 = ' + tt1)

  else:
    break

tt2 = ''
i = 0
dif = 0whileTrue:

  if i == 0:

    if bla1[i][3] >= bla2[i][3]: dif = bla1[i][3]
    if bla1[i][3] < bla2[i][3]: dif = bla2[i][3]   
    tt2 += t2[:bla2[i][1]] + '_'*dif

  if i <= len(bla2) -1:

    if bla1[i][3] >= bla2[i][3]: dif = bla1[i][3]
    if bla1[i][3] < bla2[i][3]: dif = bla2[i][3]    

    iflen(bla2) != 1:
      if i == 0: tt2 += t2[bla2[i][1] + bla2[i][3]:bla2[i+1][1]]
      if i != 0and i != len(bla1)-1: tt2 += '_'*dif + t2[bla2[i][1] + bla2[i][3]:bla2[i+1][1]]
      if i == len(bla2)-1: tt2 += '_'*dif + t2[bla2[i][1] + bla2[i][3]:len(t2)]

    i = i+1print('t2 = ' + tt2)

  else:
    breakprint()

Answer :

Unfortunately I have been too busy to continue coding this and have resorted to sub-processing diffutils ... this is a wonderful alternative to a lot of painstaking coding!

Post a Comment for "Python3, Difflib Sequencematcher"