Skip to content

Commit 4e63683

Browse files
authored
[ru] rename field etymology_text to etymology_texts (#1546)
1 parent 2be7a75 commit 4e63683

File tree

3 files changed

+34
-16
lines changed

3 files changed

+34
-16
lines changed
Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,27 @@
1-
from wikitextprocessor import WikiNode
2-
from wikitextprocessor.parser import LEVEL_KIND_FLAGS, TemplateNode
1+
from wikitextprocessor import LevelNode, NodeKind, TemplateNode, WikiNode
32

43
from ...page import clean_node
54
from ...wxr_context import WiktextractContext
65
from .models import WordEntry
76

87

98
def extract_etymology(
10-
wxr: WiktextractContext,
11-
word_entry: WordEntry,
12-
level_node: WikiNode,
13-
) -> None:
14-
etymology_nodes = []
15-
for node in level_node.invert_find_child(
16-
LEVEL_KIND_FLAGS, include_empty_str=True
17-
):
18-
if isinstance(node, TemplateNode) and node.template_name == "improve":
19-
# ignore this template
20-
continue
21-
etymology_nodes.append(node)
22-
word_entry.etymology_text = clean_node(wxr, word_entry, etymology_nodes)
9+
wxr: WiktextractContext, word_entry: WordEntry, level_node: LevelNode
10+
):
11+
e_nodes = []
12+
for node in level_node.children:
13+
if isinstance(node, LevelNode):
14+
break
15+
elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
16+
for list_item in node.find_child(NodeKind.LIST_ITEM):
17+
e_text = clean_node(wxr, word_entry, list_item.children)
18+
if e_text != "":
19+
word_entry.etymology_texts.append(e_text)
20+
elif not (
21+
isinstance(node, TemplateNode) and node.template_name == "improve"
22+
):
23+
e_nodes.append(node)
24+
if len(e_nodes) > 0:
25+
e_str = clean_node(wxr, word_entry, e_nodes)
26+
if e_str != "":
27+
word_entry.etymology_texts.append(e_str)

src/wiktextract/extractor/ru/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ class WordEntry(BaseModelWrap):
147147
default=[], description="List of coordinate terms"
148148
)
149149
holonyms: list[Linkage] = Field(default=[], description="List of holonyms")
150-
etymology_text: str = ""
150+
etymology_texts: list[str] = []
151151
related: list[Linkage] = []
152152
metagrams: list[Linkage] = []
153153
proverbs: list[Linkage] = []

tests/test_ru_page.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,3 +365,16 @@ def test_stressed_form(self):
365365
self.assertEqual(
366366
data[0]["forms"], [{"form": "коса́", "tags": ["stressed"]}]
367367
)
368+
369+
def test_etymology_list(self):
370+
data = parse_page(
371+
self.wxr,
372+
"albast",
373+
"""= {{-nl-}} =
374+
=== Семантические свойства ===
375+
==== Значение ====
376+
# [[алебастр]]
377+
=== Этимология ===
378+
*От [1285] г.""",
379+
)
380+
self.assertEqual(data[0]["etymology_texts"], ["От [1285] г."])

0 commit comments

Comments
 (0)