Skip to content

Commit d77f346

Browse files
Add files via upload
1 parent 4493f6e commit d77f346

File tree

1 file changed

+133
-0
lines changed

1 file changed

+133
-0
lines changed

ARS20AchtTransripte.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
import numpy as np
2+
from scipy.stats import pearsonr
3+
4+
# Neue empirische Terminalzeichenketten
5+
empirical_chains = [
6+
['KBG', 'VBG', 'KBBd', 'VBBd', 'KBA', 'VBA', 'KBBd', 'VBBd', 'KBA', 'VAA', 'KAA', 'VAV', 'KAV'],
7+
['VBG', 'KBBd', 'VBBd', 'VAA', 'KAA', 'VBG', 'KBBd', 'VAA', 'KAA'],
8+
['KBBd', 'VBBd', 'VAA', 'KAA'],
9+
['KBBd', 'VBBd', 'KBA', 'VBA', 'KBBd', 'VBA', 'KAE', 'VAE', 'KAA', 'VAV', 'KAV'],
10+
['KAV', 'KBBd', 'VBBd', 'KBBd', 'VAA', 'KAV'],
11+
['KBG', 'VBG', 'KBBd', 'VBBd', 'KAA'],
12+
['KBBd', 'VBBd', 'KBA', 'VAA', 'KAA'],
13+
['KBG', 'VBBd', 'KBBd', 'VBA', 'VAA', 'KAA', 'VAV', 'KAV']
14+
]
15+
16+
# Übergangszählung initialisieren
17+
transitions = {}
18+
for chain in empirical_chains:
19+
for i in range(len(chain) - 1):
20+
start, end = chain[i], chain[i + 1]
21+
if start not in transitions:
22+
transitions[start] = {}
23+
if end not in transitions[start]:
24+
transitions[start][end] = 0
25+
transitions[start][end] += 1
26+
27+
# Normalisierung: Übergangswahrscheinlichkeiten berechnen
28+
probabilities = {}
29+
for start in transitions:
30+
total = sum(transitions[start].values())
31+
probabilities[start] = {end: count / total for end, count in transitions[start].items()}
32+
33+
# Terminalzeichen und Startzeichen definieren
34+
terminal_symbols = list(set([item for sublist in empirical_chains for item in sublist]))
35+
start_symbol = empirical_chains[0][0]
36+
37+
# Funktion zur Generierung von Ketten basierend auf der Grammatik
38+
def generate_chain(max_length=10):
39+
chain = [start_symbol]
40+
while len(chain) < max_length:
41+
current = chain[-1]
42+
if current not in probabilities:
43+
break
44+
next_symbol = np.random.choice(list(probabilities[current].keys()), p=list(probabilities[current].values()))
45+
chain.append(next_symbol)
46+
if next_symbol not in probabilities:
47+
break
48+
return chain
49+
50+
# Funktion zur Berechnung relativer Häufigkeiten
51+
def compute_frequencies(chains, terminals):
52+
frequency_array = np.zeros(len(terminals))
53+
terminal_index = {term: i for i, term in enumerate(terminals)}
54+
55+
for chain in chains:
56+
for symbol in chain:
57+
if symbol in terminal_index:
58+
frequency_array[terminal_index[symbol]] += 1
59+
60+
total = frequency_array.sum()
61+
if total > 0:
62+
frequency_array /= total # Normierung der Häufigkeiten
63+
64+
return frequency_array
65+
66+
# Iterative Optimierung
67+
max_iterations = 1000
68+
tolerance = 0.01 # Toleranz für Standardmessfehler
69+
best_correlation = 0
70+
best_significance = 1
71+
72+
# Relativ Häufigkeiten der empirischen Ketten berechnen
73+
empirical_frequencies = compute_frequencies(empirical_chains, terminal_symbols)
74+
75+
for iteration in range(max_iterations):
76+
# Generiere 8 künstliche Ketten
77+
generated_chains = [generate_chain() for _ in range(8)]
78+
79+
# Relativ Häufigkeiten der generierten Ketten berechnen
80+
generated_frequencies = compute_frequencies(generated_chains, terminal_symbols)
81+
82+
# Berechne die Korrelation
83+
correlation, p_value = pearsonr(empirical_frequencies, generated_frequencies)
84+
85+
print(f"Iteration {iteration + 1}, Korrelation: {correlation:.3f}, Signifikanz: {p_value:.3f}")
86+
87+
# Überprüfen, ob Korrelation und Signifikanz akzeptabel sind
88+
if correlation >= 0.9 and p_value < 0.05:
89+
best_correlation = correlation
90+
best_significance = p_value
91+
break
92+
93+
# Anpassung der Wahrscheinlichkeiten basierend auf Standardmessfehler
94+
for start in probabilities:
95+
for end in probabilities[start]:
96+
# Fehlerabschätzung basierend auf Differenz der Häufigkeiten
97+
empirical_prob = empirical_frequencies[terminal_symbols.index(end)]
98+
generated_prob = generated_frequencies[terminal_symbols.index(end)]
99+
error = empirical_prob - generated_prob
100+
101+
# Anpassung der Wahrscheinlichkeit
102+
probabilities[start][end] += error * tolerance
103+
probabilities[start][end] = max(0, min(1, probabilities[start][end])) # Begrenzen auf [0,1]
104+
105+
# Normalisierung
106+
for start in probabilities:
107+
total = sum(probabilities[start].values())
108+
if total > 0:
109+
probabilities[start] = {end: prob / total for end, prob in probabilities[start].items()}
110+
111+
# Ergebnis ausgeben
112+
print("\nOptimierte probabilistische Grammatik:")
113+
for start, transitions in probabilities.items():
114+
print(f"{start}{transitions}")
115+
116+
print(f"\nBeste Korrelation: {best_correlation:.3f}, Signifikanz: {best_significance:.3f}")
117+
#Iteration 1, Korrelation: 0.925, Signifikanz: 0.000
118+
#
119+
#Optimierte probabilistische Grammatik:
120+
#KBG → {'VBG': 0.6666666666666666, 'VBBd': 0.3333333333333333}
121+
#VBG → {'KBBd': 1.0}
122+
#KBBd → {'VBBd': 0.6666666666666666, 'VAA': 0.16666666666666666, 'VBA': 0.16666666666666666}
123+
#VBBd → {'KBA': 0.4444444444444444, 'VAA': 0.2222222222222222, 'KBBd': 0.2222222222222222, 'KAA': 0.1111111111111111}
124+
#KBA → {'VBA': 0.5, 'VAA': 0.5}
125+
#VBA → {'KBBd': 0.5, 'KAE': 0.25, 'VAA': 0.25}
126+
#VAA → {'KAA': 0.8571428571428571, 'KAV': 0.14285714285714285}
127+
#KAA → {'VAV': 0.75, 'VBG': 0.25}
128+
#VAV → {'KAV': 1.0}
129+
#KAE → {'VAE': 1.0}
130+
#VAE → {'KAA': 1.0}
131+
#KAV → {'KBBd': 1.0}
132+
#
133+
#Beste Korrelation: 0.925, Signifikanz: 0.000

0 commit comments

Comments
 (0)