Skip to content

Commit 452b35a

Browse files
authored
[ku] extract "kîte" hyphenation template (#1522)
1 parent 91a82b2 commit 452b35a

File tree

2 files changed

+57
-0
lines changed

2 files changed

+57
-0
lines changed

src/wiktextract/extractor/ku/sound.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ def extract_sound_section(
1818
def extract_sound_list_item(
1919
wxr: WiktextractContext, word_entry: WordEntry, list_item: WikiNode
2020
) -> None:
21+
raw_tags = []
2122
for node in list_item.children:
2223
if isinstance(node, TemplateNode):
2324
if node.template_name in ["ku-IPA", "IPA-ku"]:
@@ -26,6 +27,12 @@ def extract_sound_list_item(
2627
extract_deng_template(wxr, word_entry, node)
2728
elif node.template_name == "ku-kîte":
2829
extract_ku_kîte(wxr, word_entry, node)
30+
elif node.template_name == "kîte":
31+
extract_kîte_template(wxr, word_entry, node, raw_tags)
32+
elif node.template_name.endswith("."):
33+
raw_tag = clean_node(wxr, None, node).removesuffix(":")
34+
if raw_tag != "":
35+
raw_tags.append(raw_tag)
2936
elif isinstance(node, WikiNode) and node.kind == NodeKind.LIST:
3037
for child_list_item in node.find_child(NodeKind.LIST_ITEM):
3138
extract_sound_list_item(wxr, word_entry, child_list_item)
@@ -90,3 +97,23 @@ def extract_ku_kîte(
9097
Hyphenation(parts=hyphenation.split("·"))
9198
)
9299
break
100+
101+
102+
def extract_kîte_template(
103+
wxr: WiktextractContext,
104+
word_entry: WordEntry,
105+
t_node: TemplateNode,
106+
raw_tags: list[str],
107+
):
108+
expanded_node = wxr.wtp.parse(
109+
wxr.wtp.node_to_wikitext(t_node), expand_all=True
110+
)
111+
lang_code = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
112+
for span in expanded_node.find_html(
113+
"span", attr_name="lang", attr_value=lang_code
114+
):
115+
h_str = clean_node(wxr, None, span)
116+
if h_str != "":
117+
h_data = Hyphenation(parts=h_str.split("‧"), raw_tags=raw_tags)
118+
translate_raw_tags(h_data)
119+
word_entry.hyphenations.append(h_data)

tests/test_ku_sound.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,33 @@ def test_ku_kîte(self):
8686
self.assertEqual(
8787
page_data[0]["hyphenations"], [{"parts": ["lê", "ker"]}]
8888
)
89+
90+
def test_kîte(self):
91+
self.wxr.wtp.add_page("Şablon:ziman", 10, "Latînî")
92+
self.wxr.wtp.add_page(
93+
"Şablon:pj.", 10, '<span style="font-size:95%;">Pirjimar:</span>'
94+
)
95+
self.wxr.wtp.add_page(
96+
"Şablon:kîte",
97+
10,
98+
"""{{#switch:{{{nivîsna}}}
99+
| 1 = <span class="Latn" lang="la">-{fā‧bu‧lae}-</span>
100+
| #default = [[kîte|Kîtekirin]]: <span class="Latn" lang="la">-{fā‧bu‧la}-</span>
101+
}}""",
102+
)
103+
data = parse_page(
104+
self.wxr,
105+
"fabula",
106+
"""== {{ziman|la}} ==
107+
=== Bilêvkirin ===
108+
* {{kîte|la|fā|bu|la}} {{pj.}} {{kîte|la|fā|bu|lae|nivîsna=1}}
109+
=== Navdêr ===
110+
# [[efsane]]""",
111+
)
112+
self.assertEqual(
113+
data[0]["hyphenations"],
114+
[
115+
{"parts": ["fā", "bu", "la"]},
116+
{"parts": ["fā", "bu", "lae"], "tags": ["plural"]},
117+
],
118+
)

0 commit comments

Comments
 (0)