Skip to content

Commit 16f6bc9

Browse files
authored
[fr] fix wrong form link node extracted in page "adishatz" (#1551)
1 parent 7487129 commit 16f6bc9

File tree

2 files changed

+47
-3
lines changed

2 files changed

+47
-3
lines changed

src/wiktextract/extractor/fr/linkage.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,27 @@ def extract_linkage_list_item(
194194
],
195195
).strip("— \n")
196196
break
197-
elif tag_text.strip().startswith(":"):
198-
sense_text = tag_text.strip().removeprefix(":").strip()
199-
linkage_data.sense = sense_text
197+
elif tag_text.lstrip().startswith(":"):
198+
linkage_data.sense = clean_node(
199+
wxr,
200+
None,
201+
[tag_text.lstrip().removeprefix(":").lstrip()]
202+
+ [
203+
n
204+
for n in list_item.children[index + 1 :]
205+
if not (
206+
(
207+
isinstance(n, TemplateNode)
208+
and n.template_name == "réf"
209+
)
210+
or (
211+
isinstance(n, WikiNode)
212+
and n.kind == NodeKind.LIST
213+
)
214+
)
215+
],
216+
)
217+
break
200218
else:
201219
tags, _ = capture_text_in_parentheses(tag_text)
202220
for tag in tags:

tests/test_fr_linkage.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,3 +448,29 @@ def test_zh_l_forms(self):
448448
self.assertEqual(
449449
data[0]["forms"], [{"form": "霍加狓", "roman": "huòjiāpí"}]
450450
)
451+
452+
def test_link_node_after_linkage_word(self):
453+
self.wxr.wtp.add_page("Modèle:oc gascon", 10, "''(Gascon)''")
454+
self.wxr.wtp.add_page(
455+
"Modèle:w", 10, "[[w:Michel Serres|Michel Serres]]"
456+
)
457+
self.wxr.wtp.add_page("Modèle:ISBN", 10, "ISBN 978-2746522138")
458+
data = parse_page(
459+
self.wxr,
460+
"adishatz",
461+
"""== {{langue|oc}} ==
462+
=== {{S|interjection|oc}} ===
463+
# Terme de salutation
464+
==== {{S|variantes}} ====
465+
* [[adichatz]] {{oc gascon|nocat=1}} : le philosophe, écrivain, épistémologue et académicien français d'origine [[gascon|gasconne]] {{w|Michel Serres}} a écrit un livre [[testamentaire]] intitulé ''Adichats ! (Adieu !)'' (publication posthume en 2020, {{ISBN|978-2746522138}}).""",
466+
)
467+
self.assertEqual(
468+
data[0]["forms"],
469+
[
470+
{
471+
"form": "adichatz",
472+
"raw_tags": ["Gascon"],
473+
"sense": "le philosophe, écrivain, épistémologue et académicien français d'origine gasconne Michel Serres a écrit un livre testamentaire intitulé Adichats ! (Adieu !) (publication posthume en 2020, ISBN 978-2746522138).",
474+
}
475+
],
476+
)

0 commit comments

Comments
 (0)