@@ -54,11 +54,11 @@ class RowspanHeader:
5454
5555
5656def process_verb_table (
57- wxr : WiktextractContext , word_entry : WordEntry , template_node : TemplateNode
57+ wxr : WiktextractContext , word_entry : WordEntry , t_node : TemplateNode
5858) -> None :
5959 # Vorlage:Deutsch Verb Übersicht
6060 expanded_template = wxr .wtp .parse (
61- wxr .wtp .node_to_wikitext (template_node ), expand_all = True
61+ wxr .wtp .node_to_wikitext (t_node ), expand_all = True
6262 )
6363 table_nodes = list (expanded_template .find_child (NodeKind .TABLE ))
6464 if len (table_nodes ) == 0 :
@@ -137,20 +137,22 @@ def process_verb_table(
137137
138138
139139def process_noun_table (
140- wxr : WiktextractContext , word_entry : WordEntry , template_node : TemplateNode
140+ wxr : WiktextractContext , word_entry : WordEntry , t_node : TemplateNode
141141) -> None :
142142 # Vorlage:Deutsch Substantiv Übersicht
143143 from .page import extract_note_section
144144
145145 expanded_template = wxr .wtp .parse (
146- wxr .wtp .node_to_wikitext (template_node ), expand_all = True
146+ wxr .wtp .node_to_wikitext (t_node ), expand_all = True
147147 )
148148 table_nodes = list (expanded_template .find_child (NodeKind .TABLE ))
149149 if len (table_nodes ) == 0 :
150150 return
151151 table_node = table_nodes [0 ]
152152 column_headers = []
153153 table_header = ""
154+ forms = []
155+ flexion_pages = []
154156 for table_row in table_node .find_child (NodeKind .TABLE_ROW ):
155157 row_header = ""
156158 is_header_row = not table_row .contain_node (NodeKind .TABLE_CELL )
@@ -188,7 +190,7 @@ def process_noun_table(
188190 for link_node in table_cell .find_child (NodeKind .LINK ):
189191 link_text = clean_node (wxr , None , link_node )
190192 if link_text .startswith ("Flexion:" ):
191- parse_flexion_page ( wxr , word_entry , link_text )
193+ flexion_pages . append ( link_text )
192194 else :
193195 for form_text in cell_text .splitlines ():
194196 form_text = form_text .strip ()
@@ -209,10 +211,15 @@ def process_noun_table(
209211 ):
210212 form .raw_tags .append (col_header .text )
211213 translate_raw_tags (form )
212- word_entry . forms .append (form )
214+ forms .append (form )
213215 col_index += 1
214216
217+ if t_node .template_name == "Deutsch Substantiv Übersicht" :
218+ forms = seprarte_de_article (wxr , forms )
219+ word_entry .forms .extend (forms )
215220 clean_node (wxr , word_entry , expanded_template ) # category links
221+ for flexion_page in flexion_pages :
222+ parse_flexion_page (wxr , word_entry , flexion_page )
216223 # Vorlage:Deutsch Nachname Übersicht
217224 for level_node in expanded_template .find_child (NodeKind .LEVEL4 ):
218225 section_text = clean_node (wxr , None , level_node .largs )
@@ -326,3 +333,19 @@ def extract_pronoun_table(
326333 word_entry .forms .append (form )
327334 article = ""
328335 col_index += 1
336+
337+
338+ def seprarte_de_article (
339+ wxr : WiktextractContext , forms : list [Form ]
340+ ) -> list [Form ]:
341+ # https://de.wiktionary.org/wiki/Vorlage:Deutsch_Substantiv_Übersicht
342+ # https://en.wikipedia.org/wiki/German_articles
343+ new_forms = []
344+ for form in forms :
345+ m = re .match (r"(der|die|das|den|dem|des)\s+" , form .form )
346+ if m is not None :
347+ form .form = form .form [m .end () :]
348+ form .article = m .group (1 )
349+ if form .form != wxr .wtp .title :
350+ new_forms .append (form )
351+ return new_forms
0 commit comments