diff --git a/README.md b/README.md
index 80943c0de3067bc3417f6e79b1603ac736c6f100..b55da78b63da83497817fe8c8eb81bf68ba877b6 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,10 @@ The second section should use the bootstrap data.json and the topic above, to en
 
 The third section is the actual data section. 
 
+## dataset guide
+
+There should not be `ï¼Œ` or `ã€‚` in `famous` before `$prefix`. 
+
 ----
 
 # ç‹—å±ä¸é€šæ–‡ç« ç”Ÿæˆå™¨
diff --git a/bullshit.py b/bullshit.py
index 7f4f15bbfa4d1766defa0e4fd8e0cd1657dfe179..0a00c6265873fe20005118fe57b72d378d9f7238 100644
--- a/bullshit.py
+++ b/bullshit.py
@@ -18,6 +18,11 @@ prefix_data   = list(data['prefixes' ]) # åœ¨famous_dataå‰é¢å¼„ç‚¹nonsense_dat
 postfix_data  = list(data['postfixes']) # åœ¨famous_dataåŽé¢å¼„ç‚¹nonsense_data
 nonsense_data = list(data['shits'    ]) # ä»£è¡¨æ–‡ç« ä¸»è¦nonsense_dataæ¥æº
 
+famous_bits_count = 6
+prefix_bits_count = 2
+postfix_bits_count = 3
+nonsense_bits_count = 5
+
 print("debug: len=", [len(l) for l in [famous_data, prefix_data, postfix_data, nonsense_data]])
 
 repeat_factor = 2
@@ -55,8 +60,12 @@ def decode(text):
         if paragraph == '':
             continue
 
+        # 1. famous-prefix reorder. 
+
+        # 2. element match and decode bits.
 
-def encode(text, topic, data)
+
+def encode(text, topic, data):
     result = '    '
     curr_paragraph = ''
     curr_data_offset = 0
@@ -65,8 +74,16 @@ def encode(text, topic, data)
             result += curr_paragraph + paragraph_tail()
             curr_paragraph = ''
         elif randint(0,100) < 20 :
-            curr_paragraph += new_famous()
+            # add a famous
+            _index0 = slice_bits(data, curr_data_offset, prefix_bits_count)
+            curr_data_offset += prefix_bits_count
+            _index1 = slice_bits(data, curr_data_offset, famous_bits_count)
+            curr_data_offset += famous_bits_count
+            _index2 = slice_bits(data, curr_data_offset, postfix_bits_count)
+            curr_data_offset += postfix_bits_count
+
+            curr_paragraph += new_famous(famous_data[_index1], prefix_data[_index0], postfix_data[_index2])
         else:
-            curr_paragraph += next(nonsense_generator)
+            # TODO
     result = result.replace("$topic",topic)
     print(result)