From 87d87ed6a5cb7c0b6439e26b8d0f098d156e78ad Mon Sep 17 00:00:00 2001 From: Bensong Liu <bensl@microsoft.com> Date: Fri, 30 Apr 2021 17:55:09 +0800 Subject: [PATCH] save --- README.md | 4 ++++ bullshit.py | 23 ++++++++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 80943c0..b55da78 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,10 @@ The second section should use the bootstrap data.json and the topic above, to en The third section is the actual data section. +## dataset guide + +There should not be `,` or `。` in `famous` before `$prefix`. + ---- # ç‹—å±ä¸é€šæ–‡ç« 生æˆå™¨ diff --git a/bullshit.py b/bullshit.py index 7f4f15b..0a00c62 100644 --- a/bullshit.py +++ b/bullshit.py @@ -18,6 +18,11 @@ prefix_data = list(data['prefixes' ]) # 在famous_dataå‰é¢å¼„点nonsense_dat postfix_data = list(data['postfixes']) # 在famous_dataåŽé¢å¼„点nonsense_data nonsense_data = list(data['shits' ]) # ä»£è¡¨æ–‡ç« ä¸»è¦nonsense_dataæ¥æº +famous_bits_count = 6 +prefix_bits_count = 2 +postfix_bits_count = 3 +nonsense_bits_count = 5 + print("debug: len=", [len(l) for l in [famous_data, prefix_data, postfix_data, nonsense_data]]) repeat_factor = 2 @@ -55,8 +60,12 @@ def decode(text): if paragraph == '': continue + # 1. famous-prefix reorder. + + # 2. element match and decode bits. -def encode(text, topic, data) + +def encode(text, topic, data): result = ' ' curr_paragraph = '' curr_data_offset = 0 @@ -65,8 +74,16 @@ def encode(text, topic, data) result += curr_paragraph + paragraph_tail() curr_paragraph = '' elif randint(0,100) < 20 : - curr_paragraph += new_famous() + # add a famous + _index0 = slice_bits(data, curr_data_offset, prefix_bits_count) + curr_data_offset += prefix_bits_count + _index1 = slice_bits(data, curr_data_offset, famous_bits_count) + curr_data_offset += famous_bits_count + _index2 = slice_bits(data, curr_data_offset, postfix_bits_count) + curr_data_offset += postfix_bits_count + + curr_paragraph += new_famous(famous_data[_index1], prefix_data[_index0], postfix_data[_index2]) else: - curr_paragraph += next(nonsense_generator) + # TODO result = result.replace("$topic",topic) print(result) -- GitLab