|
1 | | -from wikitextprocessor import WikiNode |
2 | | -from wikitextprocessor.parser import LEVEL_KIND_FLAGS, TemplateNode |
| 1 | +from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode |
3 | 2 |
|
4 | 3 | from ...page import clean_node |
5 | 4 | from ...wxr_context import WiktextractContext |
6 | 5 | from .models import WordEntry |
7 | 6 |
|
8 | 7 |
|
9 | 8 | def extract_etymology( |
10 | | - wxr: WiktextractContext, |
11 | | - word_entry: WordEntry, |
12 | | - level_node: WikiNode, |
13 | | -) -> None: |
14 | | - etymology_nodes = [] |
15 | | - for node in level_node.invert_find_child( |
16 | | - LEVEL_KIND_FLAGS, include_empty_str=True |
17 | | - ): |
18 | | - if isinstance(node, TemplateNode) and node.template_name == "improve": |
19 | | - # ignore this template |
20 | | - continue |
21 | | - etymology_nodes.append(node) |
22 | | - word_entry.etymology_text = clean_node(wxr, word_entry, etymology_nodes) |
| 9 | + wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode |
| 10 | +): |
| 11 | + e_nodes = [] |
| 12 | + for node in level_node.children: |
| 13 | + if isinstance(node, LevelNode): |
| 14 | + break |
| 15 | + elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST: |
| 16 | + for list_item in node.find_child(NodeKind.LIST_ITEM): |
| 17 | + e_text = clean_node(wxr, word_entry, list_item.children) |
| 18 | + if e_text != "": |
| 19 | + word_entry.etymology_texts.append(e_text) |
| 20 | + elif not ( |
| 21 | + isinstance(node, TemplateNode) and node.template_name == "improve" |
| 22 | + ): |
| 23 | + e_nodes.append(node) |
| 24 | + if len(e_nodes) > 0: |
| 25 | + e_str = clean_node(wxr, word_entry, e_nodes) |
| 26 | + if e_str != "": |
| 27 | + word_entry.etymology_texts.append(e_str) |
0 commit comments