台电 k6 你立功了
等了一年,还是停留在 v2.11 的固件上,英文就一渣排版。只能自己动手了
写了个英文排版的,两种排版方式,一是按 最小破损度 进行排版,最小破损度追求的是文章排版的整齐性,也就是行尾空格数目尽可能相等。这样外观上就会比较平整,当然,行尾的空格可以移到行内,使得每行刚好满足给定宽度。另一种是按 最小行数 进行排版。这种排版相当直观,就是把原文一个词一个词地放入一行中,当不能再放入时,就进行换行。
源码:
#!/usr/bin/env python#encoding=utf8# 最小破损度排版 import syscache = {}def cache_wrapper(func): global cache def cache_loader(*args): if args in cache: return cache[args] cache[args] = func(*args) return cache[args] return cache_loaderdef min_cost_cache_wrapper(func): def loader(alist, jth): global cache if jth in cache: return cache[jth] cache[jth] = func(alist, jth) return cache[jth] return loader @cache_wrapperdef total_cost(jth_word): word_in_one_line = cost(0, jth_word) if word_in_one_line != 2**32: return word_in_one_line cost_list = [] for k in range(0, jth_word): cost_list.append(total_cost(k) + cost(k+1, jth_word)) return min_cost(cost_list, jth_word)@cache_wrapperdef cost(from_ith, to_jth): global linewidth global len_words result = linewidth - (to_jth - from_ith) - sum([len_words[k] for k in range(from_ith, to_jth + 1)]) if result < 0: return 2**32 return result*result@min_cost_cache_wrapperdef min_cost(cost_list, jth_word): min_val = 2**32 min_index = -1 for index, cost in enumerate(cost_list): if min_val > cost: min_val = cost min_index = index global choose choose[jth_word] = min_index return min_valdef print_paragraph(choose_dict): divided_points = get_divided_point(choose_dict) #print "divided_points: ", divided_points print_lines(divided_points)def get_divided_point(choose_dict): global words #print "choose dict:", choose_dict word_index = len(words) - 1 indexes = [word_index] while True: try: indexes.append(choose_dict[word_index]) word_index = choose_dict[word_index] except KeyError: indexes.reverse() return indexesdef print_lines(indexes): pre = 0 for i in indexes: #print "print words:(%d, %d)" %(pre, i) print_words(pre, i) pre = i + 1def print_words(ith, jth): global words for k in range(ith, jth + 1): if k != jth: print words[k], else: print words[k]if __name__ == '__main__': if len(sys.argv) != 2: print "usage: ./wordwrap width 1<content" exit(-1) linewidth = int(sys.argv[1]) lines = sys.stdin.readlines() for content in lines: cache = {} if len(content) == 0: continue words = content.split() len_words = [len(word) for word in words] choose = {} penalty = total_cost(len(words) - 1) print_paragraph(choose) #print cache #print choose
上述代码按动态规划做应该能够进一步提升效率,现在只是用了记忆化搜索,仔细想来调用函数的次数还是会比动态规划要多得多,而且函数调用的代价还是比较高的。另外一个问题,是无法直接对全文进行排版,估计是效率太低,一直没有输出,我是分段切开再行排版的,有可能出现上下两段间宽度不一致的情况。
#!/usr/bin/env python#encoding=utf8# 贪心法求最小行数排版,看来还是 greedy is good 啊 import sysdef print_lines(content): global linewidth words = content.split() space_width = 1 space_left = linewidth for word in words: if len(word) + space_width > space_left: sys.stdout.write("\n") sys.stdout.write(word) space_left = linewidth - len(word) else: space_left = space_left - (len(word) + space_width) sys.stdout.write(" " + word)if __name__ == '__main__': if len(sys.argv) != 2: print "usage: ./wordwrap width 1<content" exit(-1) linewidth = int(sys.argv[1]) content = sys.stdin.read() print_lines(content) sys.stdout.write("\n\n")
Tag:
python