Explorar el Código

Fix str_get_all_between yielding half-overlapping matches

master
JustAnotherArchivist hace 3 años
padre
commit
a4cf1a4225
Se han modificado 1 ficheros con 5 adiciones y 4 borrados
  1. +5
    -4
      qwarc/utils.py

+ 5
- 4
qwarc/utils.py Ver fichero

@@ -91,16 +91,17 @@ def maybe_str_get_between(x, a, b):




def str_get_all_between(aStr, a, b): def str_get_all_between(aStr, a, b):
'''Generator yielding every string between occurrences of a in aStr and the following occurrence of b.'''

#TODO: This produces half-overlapping matches: str_get_all_between('aabc', 'a', 'c') will yield 'ab' and 'b'.
# Might need to implement sending an offset to the find_all generator to work around this, or discard aOffset values which are smaller than the previous bPos+len(b).
'''Generator yielding every string between an occurrence of a in aStr and the following occurrence of b.'''


prevEnd = -1
for aOffset in find_all(aStr, a): for aOffset in find_all(aStr, a):
if aOffset < prevEnd:
continue
offset = aOffset + len(a) offset = aOffset + len(a)
bPos = aStr.find(b, offset) bPos = aStr.find(b, offset)
if bPos != -1: if bPos != -1:
yield aStr[offset:bPos] yield aStr[offset:bPos]
prevEnd = bPos + len(b)




def maybe_str_get_all_between(x, a, b): def maybe_str_get_all_between(x, a, b):


Cargando…
Cancelar
Guardar