Source code for asr.align

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""Calculate the word error rate (WER) of two word sequences."""


[docs]def calculate_wer(reference, hypothesis): """ Calculation of WER with Levenshtein distance. Works only for iterables up to 254 elements (uint8). O(nm) time and space complexity. >>> calculate_wer("who is there".split(), "is there".split()) 1 >>> calculate_wer("who is there".split(), "".split()) 3 >>> calculate_wer("".split(), "who is there".split()) 3 """ # initialisation import numpy d = numpy.zeros((len(reference)+1)*(len(hypothesis)+1), dtype=numpy.uint8) d = d.reshape((len(reference)+1, len(hypothesis)+1)) for i in range(len(reference)+1): for j in range(len(hypothesis)+1): if i == 0: d[0][j] = j elif j == 0: d[i][0] = i # computation for i in range(1, len(reference)+1): for j in range(1, len(hypothesis)+1): if reference[i-1] == hypothesis[j-1]: d[i][j] = d[i-1][j-1] else: substitution = d[i-1][j-1] + 1 insertion = d[i][j-1] + 1 deletion = d[i-1][j] + 1 d[i][j] = min(substitution, insertion, deletion) return d[len(reference)][len(hypothesis)]/float(len(reference))
[docs]def get_parser(): """Get a parser object""" from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter parser = ArgumentParser(description=__doc__, formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument("-s1", dest="s1", help="sequence 1") parser.add_argument("-s2", dest="s2", help="sequence 2") return parser
def main(s1, s2): wer = calculate_wer(s1.split(), s2.split()) print("%0.4f" % wer) if __name__ == '__main__': args = get_parser().parse_args() main(args.s1, args.s2)