diff --git a/README.md b/README.md index 80943c0de3067bc3417f6e79b1603ac736c6f100..b55da78b63da83497817fe8c8eb81bf68ba877b6 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,10 @@ The second section should use the bootstrap data.json and the topic above, to en The third section is the actual data section. +## dataset guide + +There should not be `,` or `。` in `famous` before `$prefix`. + ---- # ç‹—å±ä¸é€šæ–‡ç« 生æˆå™¨ diff --git a/bullshit.py b/bullshit.py index 7f4f15bbfa4d1766defa0e4fd8e0cd1657dfe179..0a00c6265873fe20005118fe57b72d378d9f7238 100644 --- a/bullshit.py +++ b/bullshit.py @@ -18,6 +18,11 @@ prefix_data = list(data['prefixes' ]) # 在famous_dataå‰é¢å¼„点nonsense_dat postfix_data = list(data['postfixes']) # 在famous_dataåŽé¢å¼„点nonsense_data nonsense_data = list(data['shits' ]) # ä»£è¡¨æ–‡ç« ä¸»è¦nonsense_dataæ¥æº +famous_bits_count = 6 +prefix_bits_count = 2 +postfix_bits_count = 3 +nonsense_bits_count = 5 + print("debug: len=", [len(l) for l in [famous_data, prefix_data, postfix_data, nonsense_data]]) repeat_factor = 2 @@ -55,8 +60,12 @@ def decode(text): if paragraph == '': continue + # 1. famous-prefix reorder. + + # 2. element match and decode bits. -def encode(text, topic, data) + +def encode(text, topic, data): result = ' ' curr_paragraph = '' curr_data_offset = 0 @@ -65,8 +74,16 @@ def encode(text, topic, data) result += curr_paragraph + paragraph_tail() curr_paragraph = '' elif randint(0,100) < 20 : - curr_paragraph += new_famous() + # add a famous + _index0 = slice_bits(data, curr_data_offset, prefix_bits_count) + curr_data_offset += prefix_bits_count + _index1 = slice_bits(data, curr_data_offset, famous_bits_count) + curr_data_offset += famous_bits_count + _index2 = slice_bits(data, curr_data_offset, postfix_bits_count) + curr_data_offset += postfix_bits_count + + curr_paragraph += new_famous(famous_data[_index1], prefix_data[_index0], postfix_data[_index2]) else: - curr_paragraph += next(nonsense_generator) + # TODO result = result.replace("$topic",topic) print(result)