-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathARS20AchtTransripte.py
133 lines (112 loc) · 5.26 KB
/
ARS20AchtTransripte.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import numpy as np
from scipy.stats import pearsonr
# Neue empirische Terminalzeichenketten
empirical_chains = [
['KBG', 'VBG', 'KBBd', 'VBBd', 'KBA', 'VBA', 'KBBd', 'VBBd', 'KBA', 'VAA', 'KAA', 'VAV', 'KAV'],
['VBG', 'KBBd', 'VBBd', 'VAA', 'KAA', 'VBG', 'KBBd', 'VAA', 'KAA'],
['KBBd', 'VBBd', 'VAA', 'KAA'],
['KBBd', 'VBBd', 'KBA', 'VBA', 'KBBd', 'VBA', 'KAE', 'VAE', 'KAA', 'VAV', 'KAV'],
['KAV', 'KBBd', 'VBBd', 'KBBd', 'VAA', 'KAV'],
['KBG', 'VBG', 'KBBd', 'VBBd', 'KAA'],
['KBBd', 'VBBd', 'KBA', 'VAA', 'KAA'],
['KBG', 'VBBd', 'KBBd', 'VBA', 'VAA', 'KAA', 'VAV', 'KAV']
]
# Übergangszählung initialisieren
transitions = {}
for chain in empirical_chains:
for i in range(len(chain) - 1):
start, end = chain[i], chain[i + 1]
if start not in transitions:
transitions[start] = {}
if end not in transitions[start]:
transitions[start][end] = 0
transitions[start][end] += 1
# Normalisierung: Übergangswahrscheinlichkeiten berechnen
probabilities = {}
for start in transitions:
total = sum(transitions[start].values())
probabilities[start] = {end: count / total for end, count in transitions[start].items()}
# Terminalzeichen und Startzeichen definieren
terminal_symbols = list(set([item for sublist in empirical_chains for item in sublist]))
start_symbol = empirical_chains[0][0]
# Funktion zur Generierung von Ketten basierend auf der Grammatik
def generate_chain(max_length=10):
chain = [start_symbol]
while len(chain) < max_length:
current = chain[-1]
if current not in probabilities:
break
next_symbol = np.random.choice(list(probabilities[current].keys()), p=list(probabilities[current].values()))
chain.append(next_symbol)
if next_symbol not in probabilities:
break
return chain
# Funktion zur Berechnung relativer Häufigkeiten
def compute_frequencies(chains, terminals):
frequency_array = np.zeros(len(terminals))
terminal_index = {term: i for i, term in enumerate(terminals)}
for chain in chains:
for symbol in chain:
if symbol in terminal_index:
frequency_array[terminal_index[symbol]] += 1
total = frequency_array.sum()
if total > 0:
frequency_array /= total # Normierung der Häufigkeiten
return frequency_array
# Iterative Optimierung
max_iterations = 1000
tolerance = 0.01 # Toleranz für Standardmessfehler
best_correlation = 0
best_significance = 1
# Relativ Häufigkeiten der empirischen Ketten berechnen
empirical_frequencies = compute_frequencies(empirical_chains, terminal_symbols)
for iteration in range(max_iterations):
# Generiere 8 künstliche Ketten
generated_chains = [generate_chain() for _ in range(8)]
# Relativ Häufigkeiten der generierten Ketten berechnen
generated_frequencies = compute_frequencies(generated_chains, terminal_symbols)
# Berechne die Korrelation
correlation, p_value = pearsonr(empirical_frequencies, generated_frequencies)
print(f"Iteration {iteration + 1}, Korrelation: {correlation:.3f}, Signifikanz: {p_value:.3f}")
# Überprüfen, ob Korrelation und Signifikanz akzeptabel sind
if correlation >= 0.9 and p_value < 0.05:
best_correlation = correlation
best_significance = p_value
break
# Anpassung der Wahrscheinlichkeiten basierend auf Standardmessfehler
for start in probabilities:
for end in probabilities[start]:
# Fehlerabschätzung basierend auf Differenz der Häufigkeiten
empirical_prob = empirical_frequencies[terminal_symbols.index(end)]
generated_prob = generated_frequencies[terminal_symbols.index(end)]
error = empirical_prob - generated_prob
# Anpassung der Wahrscheinlichkeit
probabilities[start][end] += error * tolerance
probabilities[start][end] = max(0, min(1, probabilities[start][end])) # Begrenzen auf [0,1]
# Normalisierung
for start in probabilities:
total = sum(probabilities[start].values())
if total > 0:
probabilities[start] = {end: prob / total for end, prob in probabilities[start].items()}
# Ergebnis ausgeben
print("\nOptimierte probabilistische Grammatik:")
for start, transitions in probabilities.items():
print(f"{start} → {transitions}")
print(f"\nBeste Korrelation: {best_correlation:.3f}, Signifikanz: {best_significance:.3f}")
#Iteration 1, Korrelation: 0.925, Signifikanz: 0.000
#
#Optimierte probabilistische Grammatik:
#KBG → {'VBG': 0.6666666666666666, 'VBBd': 0.3333333333333333}
#VBG → {'KBBd': 1.0}
#KBBd → {'VBBd': 0.6666666666666666, 'VAA': 0.16666666666666666, 'VBA': 0.16666666666666666}
#VBBd → {'KBA': 0.4444444444444444, 'VAA': 0.2222222222222222, 'KBBd': 0.2222222222222222, 'KAA': 0.1111111111111111}
#KBA → {'VBA': 0.5, 'VAA': 0.5}
#VBA → {'KBBd': 0.5, 'KAE': 0.25, 'VAA': 0.25}
#VAA → {'KAA': 0.8571428571428571, 'KAV': 0.14285714285714285}
#KAA → {'VAV': 0.75, 'VBG': 0.25}
#VAV → {'KAV': 1.0}
#KAE → {'VAE': 1.0}
#VAE → {'KAA': 1.0}
#KAV → {'KBBd': 1.0}
#
#Beste Korrelation: 0.925, Signifikanz: 0.000