in codes/data_processing.py [0:0]
def tokenizer (x, k):
## K-mer tokenization for DNA sequences ##
tok = ''
i = 0
while i <= len(x)-k:
for j in range (k):
tok = tok + x[i+j]
tok = tok + ' '
i+=1
return tok