等了一年,还是停留在 v2.11 的固件上,英文就一渣排版。只能自己动手了

写了个英文排版的,两种排版方式,一是按 最小破损度 进行排版,最小破损度追求的是文章排版的整齐性,也就是行尾空格数目尽可能相等。这样外观上就会比较平整,当然,行尾的空格可以移到行内,使得每行刚好满足给定宽度。另一种是按 最小行数 进行排版。这种排版相当直观,就是把原文一个词一个词地放入一行中,当不能再放入时,就进行换行。

源码:

#!/usr/bin/env python#encoding=utf8# 最小破损度排版 import syscache = {}def cache_wrapper(func):    global cache    def cache_loader(*args):        if args in cache:            return cache[args]        cache[args] = func(*args)        return cache[args]    return cache_loaderdef min_cost_cache_wrapper(func):    def loader(alist, jth):        global cache        if jth in cache:            return cache[jth]        cache[jth] = func(alist, jth)        return cache[jth]    return loader    @cache_wrapperdef total_cost(jth_word):    word_in_one_line = cost(0, jth_word)     if word_in_one_line != 2**32:        return word_in_one_line    cost_list = []    for k in range(0, jth_word):        cost_list.append(total_cost(k) + cost(k+1, jth_word))    return min_cost(cost_list, jth_word)@cache_wrapperdef cost(from_ith, to_jth):    global linewidth    global len_words    result = linewidth - (to_jth - from_ith) - sum([len_words[k] for k in range(from_ith, to_jth + 1)])    if result < 0:        return 2**32    return result*result@min_cost_cache_wrapperdef min_cost(cost_list, jth_word):    min_val = 2**32    min_index = -1    for index, cost in enumerate(cost_list):        if min_val > cost:            min_val = cost                min_index = index    global choose    choose[jth_word] = min_index    return min_valdef print_paragraph(choose_dict):    divided_points = get_divided_point(choose_dict)    #print "divided_points: ", divided_points    print_lines(divided_points)def get_divided_point(choose_dict):    global words    #print "choose dict:", choose_dict    word_index = len(words) - 1    indexes = [word_index]    while True:        try:            indexes.append(choose_dict[word_index])            word_index = choose_dict[word_index]        except KeyError:            indexes.reverse()            return indexesdef print_lines(indexes):    pre = 0    for i in indexes:        #print "print words:(%d, %d)" %(pre, i)        print_words(pre, i)        pre = i + 1def print_words(ith, jth):    global words    for k in range(ith, jth + 1):        if k != jth:            print words[k],        else:            print words[k]if __name__ == '__main__':    if len(sys.argv) != 2:        print "usage: ./wordwrap width 1<content"        exit(-1)    linewidth = int(sys.argv[1])    lines = sys.stdin.readlines()    for content in lines:        cache = {}            if len(content) == 0:            continue        words = content.split()        len_words = [len(word) for word in words]        choose = {}        penalty = total_cost(len(words) - 1)        print_paragraph(choose)        #print cache    #print choose

上述代码按动态规划做应该能够进一步提升效率,现在只是用了记忆化搜索,仔细想来调用函数的次数还是会比动态规划要多得多,而且函数调用的代价还是比较高的。另外一个问题,是无法直接对全文进行排版,估计是效率太低,一直没有输出,我是分段切开再行排版的,有可能出现上下两段间宽度不一致的情况。

#!/usr/bin/env python#encoding=utf8# 贪心法求最小行数排版,看来还是 greedy is good 啊 import sysdef print_lines(content):    global linewidth    words = content.split()    space_width = 1    space_left = linewidth    for word in words:        if len(word) + space_width > space_left:            sys.stdout.write("\n")            sys.stdout.write(word)            space_left = linewidth - len(word)        else:            space_left = space_left - (len(word) + space_width)            sys.stdout.write(" " + word)if __name__ == '__main__':    if len(sys.argv) != 2:        print "usage: ./wordwrap width 1<content"        exit(-1)    linewidth = int(sys.argv[1])    content = sys.stdin.read()    print_lines(content)        sys.stdout.write("\n\n")