def tokenizer()

in codes/data_processing.py [0:0]


def tokenizer (x, k):
    ## K-mer tokenization for DNA sequences ##
    tok = ''
    i   = 0
    while i <= len(x)-k:
        for j in range (k):
            tok = tok + x[i+j]
        tok = tok + ' '
        i+=1
    return tok