1+ import numpy as np
2+ from scipy .stats import pearsonr
3+
4+ # Neue empirische Terminalzeichenketten
5+ empirical_chains = [
6+ ['KBG' , 'VBG' , 'KBBd' , 'VBBd' , 'KBA' , 'VBA' , 'KBBd' , 'VBBd' , 'KBA' , 'VAA' , 'KAA' , 'VAV' , 'KAV' ],
7+ ['VBG' , 'KBBd' , 'VBBd' , 'VAA' , 'KAA' , 'VBG' , 'KBBd' , 'VAA' , 'KAA' ],
8+ ['KBBd' , 'VBBd' , 'VAA' , 'KAA' ],
9+ ['KBBd' , 'VBBd' , 'KBA' , 'VBA' , 'KBBd' , 'VBA' , 'KAE' , 'VAE' , 'KAA' , 'VAV' , 'KAV' ],
10+ ['KAV' , 'KBBd' , 'VBBd' , 'KBBd' , 'VAA' , 'KAV' ],
11+ ['KBG' , 'VBG' , 'KBBd' , 'VBBd' , 'KAA' ],
12+ ['KBBd' , 'VBBd' , 'KBA' , 'VAA' , 'KAA' ],
13+ ['KBG' , 'VBBd' , 'KBBd' , 'VBA' , 'VAA' , 'KAA' , 'VAV' , 'KAV' ]
14+ ]
15+
16+ # Übergangszählung initialisieren
17+ transitions = {}
18+ for chain in empirical_chains :
19+ for i in range (len (chain ) - 1 ):
20+ start , end = chain [i ], chain [i + 1 ]
21+ if start not in transitions :
22+ transitions [start ] = {}
23+ if end not in transitions [start ]:
24+ transitions [start ][end ] = 0
25+ transitions [start ][end ] += 1
26+
27+ # Normalisierung: Übergangswahrscheinlichkeiten berechnen
28+ probabilities = {}
29+ for start in transitions :
30+ total = sum (transitions [start ].values ())
31+ probabilities [start ] = {end : count / total for end , count in transitions [start ].items ()}
32+
33+ # Terminalzeichen und Startzeichen definieren
34+ terminal_symbols = list (set ([item for sublist in empirical_chains for item in sublist ]))
35+ start_symbol = empirical_chains [0 ][0 ]
36+
37+ # Funktion zur Generierung von Ketten basierend auf der Grammatik
38+ def generate_chain (max_length = 10 ):
39+ chain = [start_symbol ]
40+ while len (chain ) < max_length :
41+ current = chain [- 1 ]
42+ if current not in probabilities :
43+ break
44+ next_symbol = np .random .choice (list (probabilities [current ].keys ()), p = list (probabilities [current ].values ()))
45+ chain .append (next_symbol )
46+ if next_symbol not in probabilities :
47+ break
48+ return chain
49+
50+ # Funktion zur Berechnung relativer Häufigkeiten
51+ def compute_frequencies (chains , terminals ):
52+ frequency_array = np .zeros (len (terminals ))
53+ terminal_index = {term : i for i , term in enumerate (terminals )}
54+
55+ for chain in chains :
56+ for symbol in chain :
57+ if symbol in terminal_index :
58+ frequency_array [terminal_index [symbol ]] += 1
59+
60+ total = frequency_array .sum ()
61+ if total > 0 :
62+ frequency_array /= total # Normierung der Häufigkeiten
63+
64+ return frequency_array
65+
66+ # Iterative Optimierung
67+ max_iterations = 1000
68+ tolerance = 0.01 # Toleranz für Standardmessfehler
69+ best_correlation = 0
70+ best_significance = 1
71+
72+ # Relativ Häufigkeiten der empirischen Ketten berechnen
73+ empirical_frequencies = compute_frequencies (empirical_chains , terminal_symbols )
74+
75+ for iteration in range (max_iterations ):
76+ # Generiere 8 künstliche Ketten
77+ generated_chains = [generate_chain () for _ in range (8 )]
78+
79+ # Relativ Häufigkeiten der generierten Ketten berechnen
80+ generated_frequencies = compute_frequencies (generated_chains , terminal_symbols )
81+
82+ # Berechne die Korrelation
83+ correlation , p_value = pearsonr (empirical_frequencies , generated_frequencies )
84+
85+ print (f"Iteration { iteration + 1 } , Korrelation: { correlation :.3f} , Signifikanz: { p_value :.3f} " )
86+
87+ # Überprüfen, ob Korrelation und Signifikanz akzeptabel sind
88+ if correlation >= 0.9 and p_value < 0.05 :
89+ best_correlation = correlation
90+ best_significance = p_value
91+ break
92+
93+ # Anpassung der Wahrscheinlichkeiten basierend auf Standardmessfehler
94+ for start in probabilities :
95+ for end in probabilities [start ]:
96+ # Fehlerabschätzung basierend auf Differenz der Häufigkeiten
97+ empirical_prob = empirical_frequencies [terminal_symbols .index (end )]
98+ generated_prob = generated_frequencies [terminal_symbols .index (end )]
99+ error = empirical_prob - generated_prob
100+
101+ # Anpassung der Wahrscheinlichkeit
102+ probabilities [start ][end ] += error * tolerance
103+ probabilities [start ][end ] = max (0 , min (1 , probabilities [start ][end ])) # Begrenzen auf [0,1]
104+
105+ # Normalisierung
106+ for start in probabilities :
107+ total = sum (probabilities [start ].values ())
108+ if total > 0 :
109+ probabilities [start ] = {end : prob / total for end , prob in probabilities [start ].items ()}
110+
111+ # Ergebnis ausgeben
112+ print ("\n Optimierte probabilistische Grammatik:" )
113+ for start , transitions in probabilities .items ():
114+ print (f"{ start } → { transitions } " )
115+
116+ print (f"\n Beste Korrelation: { best_correlation :.3f} , Signifikanz: { best_significance :.3f} " )
117+ #Iteration 1, Korrelation: 0.925, Signifikanz: 0.000
118+ #
119+ #Optimierte probabilistische Grammatik:
120+ #KBG → {'VBG': 0.6666666666666666, 'VBBd': 0.3333333333333333}
121+ #VBG → {'KBBd': 1.0}
122+ #KBBd → {'VBBd': 0.6666666666666666, 'VAA': 0.16666666666666666, 'VBA': 0.16666666666666666}
123+ #VBBd → {'KBA': 0.4444444444444444, 'VAA': 0.2222222222222222, 'KBBd': 0.2222222222222222, 'KAA': 0.1111111111111111}
124+ #KBA → {'VBA': 0.5, 'VAA': 0.5}
125+ #VBA → {'KBBd': 0.5, 'KAE': 0.25, 'VAA': 0.25}
126+ #VAA → {'KAA': 0.8571428571428571, 'KAV': 0.14285714285714285}
127+ #KAA → {'VAV': 0.75, 'VBG': 0.25}
128+ #VAV → {'KAV': 1.0}
129+ #KAE → {'VAE': 1.0}
130+ #VAE → {'KAA': 1.0}
131+ #KAV → {'KBBd': 1.0}
132+ #
133+ #Beste Korrelation: 0.925, Signifikanz: 0.000
0 commit comments