-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path13.8.py
More file actions
98 lines (85 loc) · 3.12 KB
/
13.8.py
File metadata and controls
98 lines (85 loc) · 3.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#coding:utf-8
from random import *
import random
import time
import bisect
from bisect import bisect_left
from string import *
"""
写一个程序,用上面说的算法(Markov analysis 马科夫分析)来从一本书中随机挑选单词。
"""
fin = open('C:\Users\LzyRapx\PycharmProjects\untitled\words.txt')
fin2 = open("C:\Users\LzyRapx\PycharmProjects\untitled\kamasutra.txt")
from random import *
from string import *
def text_reader(doc):
"""
Reads a document and returns it as a list of all words included
The list is ordered in the same way that the text was
"""
doc = doc.read().decode("utf-8-sig").encode("utf-8")
doc = doc.lower()
doc = doc.strip()
doc = doc.translate(None, punctuation)
return doc.split()
def get_prefixes(l, prefix=2):
"""
Takes a list of words in text-based order and forms a dictionary with
keys as tuples of prefixes ... values are lists of single word suffixes
that can follow the tuple of prefixes
(...prefixes can have multiple words)
"""
for index in range(len(l)):
key_tuple = l[index]
for i in range(prefix-1):
if index < len(l) - prefix:
key_tuple += ' ' + l[index + (i+1)]
d[tuple(key_tuple.split())] = \
d.get(tuple(key_tuple.split()), []) \
+ [l[index + prefix]]
def generate_text(d, how_many_lines=5):
"""
Takes a dictionary as input where keys (tuples of prefixes) map to
values (list of words that follow after the prefixes = suffixes).
The dictionary was created using markov analysis.
Using the dictionary, this function picks a prefix at random and maps
them to a suffix picked at random.
In the following, the last words of the existing text is taken as a
prefix to pick a new suffix at random.
"""
# prefix_len is determined by dictionary
prefix_len = len(choice(d.keys()))
# Pick the beginning of random text
start = choice(d.keys())
for i in range(len(start)):
print (start[i])
# Initiate counter to stop after a number of lines (5 by default)
lines = 0
poem = []
while lines <= how_many_lines:
output = ''
for i in range(prefix_len):
output += ' ' + start[i]
suffix = choice(d[start])
output += ' ' + suffix
poem.append([output])
next_pick_list = output.split()
start = tuple(next_pick_list[len(next_pick_list)
- prefix_len:len(next_pick_list)])
lines += 1
# print poem
for l in poem:
delimiter = ' '
l = delimiter.join(l).split()
word = l[len(l)-1:]
print (word[0])
# Text of a document as a list of all its words
kamasutra = text_reader(fin2)
# Dictionary that contains tuples of prefixes as keys and a list
# of possible suffixes as values
d = {}
# Builds a dictionary with all possible prefixes and their one-word suffix
# Length of prefixes is customizable and 2 if not specified
get_prefixes(kamasutra, 4)
# Write poem
generate_text(d, 25)