-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_song.py
142 lines (133 loc) · 4.03 KB
/
generate_song.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import pandas as pd
import random
import nltk
from get_combinations import get_accentuated_variations
pronunciations = nltk.corpus.cmudict.dict()
def get_pronun(word):
word = word.lower()
matches = pronunciations.get(word)
if matches:
# if len(matches) > 1:
# print("WARNING: more than one pronunciation for '%s' found: %s" % (word, matches))
return matches[0]
return ["?0"]
def get_stress(word):
# Get singable components (vowel like)
# Return only stress
parts = tuple([char[-1] for char in get_pronun(word) if char[-1].isdigit()])
parts = ''.join(parts)
return parts
def phrase_stress_pattern(phrase, as_string=True):
pattern = [get_stress(token) for token in nltk.word_tokenize(phrase)]
if as_string:
return '-'.join([''.join(p) for p in pattern])
return pattern
def create_corpus():
words = []
sources = [
'austen-emma.txt',
'austen-persuasion.txt',
'austen-sense.txt',
'bible-kjv.txt',
'blake-poems.txt',
'bryant-stories.txt',
'burgess-busterbrown.txt',
'carroll-alice.txt',
'chesterton-ball.txt',
'chesterton-brown.txt',
'chesterton-thursday.txt',
'edgeworth-parents.txt',
'melville-moby_dick.txt',
'milton-paradise.txt',
'shakespeare-caesar.txt',
'shakespeare-hamlet.txt',
'shakespeare-macbeth.txt'
]
for source in sources:
corpus = nltk.corpus.gutenberg.raw(source)
corpus_words = ' '.join(corpus.split('\n')).split(' ')
words += corpus_words
return words
def create_dataframe():
words = create_corpus()
df = pd.DataFrame(
[
{'stress': get_stress(word), 'word': word} for word in words
] + [
{
'stress': '10101',
'word': 'unparentheses'
},
{
'stress': '0101',
'word': 'and more and more'
},
{
'stress': '1001',
'word': 'melody fat'
},
{
'stress': '10111',
'word': 'ready no new far'
},
{
'stress': '111',
'word': 'bad bad bad'
},
{
'stress': '10010',
'word': 'never a faster'
},
{
'stress': '0111',
'word': 'but no no no'
},
{
'stress': '00',
'word': 'sh sh'
}
]
)
return df
def generate_line_matching_syllables(line):
variations = get_accentuated_variations(
line, min_word_syllables=2, max_word_syllables=5
)
# [['101', '0101'], ['1010', '101'], ['10', '10101'], ['10101', '01'], ['10', '10', '101'], ['10', '101', '01'], ['101', '01', '01']]
variation = random.choice(variations)
df = create_dataframe()
words = []
for target in variation:
target = "^" + target + "$"
matches = df.stress.str.match(target)
options = df[matches].word
try:
words.append(options.sample(1).values[0])
except:
print(target)
words.append('No')
return words
# ['1010', '101']
if __name__ == "__main__":
song = [
"Twinkle twinkle little star",
"How I wonder what you are",
"Up above the world so high",
"Like a diamond in the sky",
"Twinkle twinkle little star",
"How I wonder what you are",
"When the blazing sun is gone",
"When he nothing shines upon",
"Then you show your little light",
"Twinkle twinkle all the night",
"Twinkle twinkle little star",
"How I wonder what you are",
]
parsed_song = []
for line in song:
parsed_song.append(phrase_stress_pattern(line))
# print(parsed_song)
new_song = []
for line in parsed_song:
new_song.append(generate_line_matching_syllables(line))
print(new_song)