模組:zh-usex
This module does the work for {{zh-x}}
, see there for more.
Data for this module is found in Module:zh-usex/data.
Informal testcases. Put stuff that isn't working here!
- with
|tr=
:- a,o,e開頭的音節連接在其它音節後面的時候,如果音節的界限發生混淆,用隔音符號(’)隔開,例如:pi’ao(皮襖)。 [標準漢語, 正體]
- From: 1958, 汉语拼音方案 (Scheme for the Chinese Phonetic Alphabet), section 5
- a, o, e kāitóu de yīnjié liánjiē zài qítā yīnjié hòumiàn de shíhou, rúguǒ yīnjié de jièxiàn fāshēng hùnxiáo, yòng géyīn fúhào (’) gékāi, lìrú: pi’ ao (pí'ǎo). [拼音]
- [In Hanyu Pinyin], when a syllable beginning in a, o or e is preceded by another syllable, if the division between the syllables is unclear, use the syllable-dividing apostrophe (’) to divide [the syllables]. For example: pi’ao 皮襖。
a,o,e开头的音节连接在其它音节后面的时候,如果音节的界限发生混淆,用隔音符号(’)隔开,例如:pi’ao(皮袄)。 [標準漢語, 簡體] - without
|tr=
:- a,o,e開頭的音節連接在其它音節後面的時候,如果音節的界限發生混淆,用隔音符號(’)隔開,例如:pi’ao(皮襖)。 [標準漢語, 正體]
- From: 1958, 汉语拼音方案 (Scheme for the Chinese Phonetic Alphabet), section 5
- a, o, e kāitóu de yīnjié liánjiē zài qítā yīnjié hòumiàn de shíhou, rúguǒ yīnjié de jièxiàn fāshēng hùnxiáo, yòng géyīn fúhào (’) gékāi, lìrú: pi’ ao (pí'ǎo). [拼音]
- [In Hanyu Pinyin], when a syllable beginning in a, o or e is preceded by another syllable, if the division between the syllables is unclear, use the syllable-dividing apostrophe (’) to divide [the syllables]. For example: pi’ao 皮襖。
a,o,e开头的音节连接在其它音节后面的时候,如果音节的界限发生混淆,用隔音符号(’)隔开,例如:pi’ao(皮袄)。 [標準漢語, 簡體] - 室外
室外非常
非常寒冷
寒冷,
,大家
大家都
都把
把身體
身体蜷縮
蜷缩起來
起来保暖
保暖。
。[標準漢語, 正體↑ + 簡體↓] [rom.: 拼音] - It's freezing outside, everyone huddle together to keep warm.
- 有一e5歐巴桑去美國chit4-tho5,欲去便所e5時,因為m7捌字,煞行入去查甫e0彼間,無外久,一e5阿督仔行入去,隨擱闖出來,一直喝講:「I am sorry,I am sorry。」尾a0,彼e5阿婆仔行出來氣chua3chua3講:「夭壽哦!一e5阿督仔真無禮貌,行入來人e5便所,也擱怪人門「抑m7鎖咧!」 [臺語, 正體]
- From: 曹麗華 (ed.), 笑詼一則 抑m7鎖咧
- Ū chi̍t ê o͘-bá-sáng khì Bí-kok chhit-thô, beh khì piān-só͘ ê sî, in-ūi m̄ bat-jī, soah kiâⁿ ji̍p-khì cha-po͘ ê hit keng, bô-gōa-kú, chi̍t ê a-tok-á kiâⁿ ji̍p-khì, sûi koh chhoàng chhut-lâi, it-ti̍t hoah kóng: “I am sorry, I am sorry.” Bóe--á, hit-ê a-pô-á kiâⁿ chhut-lâi khì-chhòa-chhòa kóng: “Iáu-siū ô͘! Chi̍t ê a-tok-á chin bô lé-māu, kiâⁿ ji̍p-lâi lâng ê piān-só͘, iá-koh koài lâng mn̂g “a̍h m̄ só--leh!” [白話字]
- There was an old granny who went to America on vacation. When it came time for her to go to the restroom, because she was illiterate, she ended up going into the men's room. Before long, a roundeye walked in, then quickly rushed out as he kept yelling, "I am sorry, I am sorry." Finally, the old lady came out, angrily saying, "Screw him! The roundeye is very rude. He barged into my bathroom, but he still blamed me, saying the door 'wasn't locked!' (a̍h m̄ só--leh, which sounds like "I am sorry")"
有一e5欧巴桑去美国chit4-tho5,欲去便所e5时,因为m7捌字,煞行入去查甫e0彼间,无外久,一e5阿督仔行入去,随搁闯出来,一直喝讲:“I am sorry,I am sorry。”尾a0,彼e5阿婆仔行出来气chua3chua3讲:“夭寿哦!一e5阿督仔真无礼貌,行入来人e5便所,也搁怪人门“抑m7锁咧!” [臺語, 簡體]
local m_zh = require("Module:zh")
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local sub = mw.ustring.sub
-- Use this when the actual title needs to be known.
local actual_title = mw.title.getCurrentTitle()
-- Use this when testcases need to be able to override the title (for bolding,
-- for instance).
local title = actual_title
local PAGENAME = PAGENAME or title.text
local export = {}
local data = mw.loadData("Module:zh-usex/data")
local variety_list = data.variety_list
local punctuation = data.punctuation
local ref_list = data.ref_list
local pron_correction = data.pron_correction
local polysyllable_pron_correction = data.polysyllable_pron_correction
local zh_format_start_simp = "<span lang=\"zh\" class=\"Hans\">"
local zh_format_start_trad = "<span lang=\"zh\" class=\"Hant\">"
local zh_format_end = "</span>"
local bg_format_start = '<div style="background-color:#FCFEFB">'
local bg_format_end = '</div>'
local Han_pattern = '[一-鿌㐀-䶵𠀀-]'
local UTF8_char = '[%z\1-\127\194-\244][\128-\191]*'
local tag = "%b<>"
local function make_link(word)
local orig_word = word
word = "[[" .. word .. "]]"
-- Replace "-" with "]][[" between Han characters (optionally with a HTML
-- tag between the Han character and the hyphen). This would be simpler with
-- regex or LPeg.
word = word:gsub("()(" .. UTF8_char .. ")%-()(" .. UTF8_char .. ")",
function (pos1, before, pos2, after)
if (before == ">"
and find(word:sub(1, pos1), Han_pattern .. tag .. "$")
or find(before, Han_pattern))
and (after == "<"
and find(word:sub(pos2), "^" .. tag .. Han_pattern)
or find(after, Han_pattern)) then
return before .. "]][[" .. after
end
end)
-- If an entire word is bolded in a link, move the tags out of the wikilink syntax.
word = word
:gsub("(%[%[)(</?b>)(.-)(</?b>)(%]%])", "%2%1%3%5%4")
-- Move br tags out of links.
:gsub("(%[%[)(<br ?/?>)", "%2%1")
-- Link to Chinese section.
-- Remove bolding from link target, leave it in link text.
:gsub("%[%[([^|]-)%]%]",
function (word)
return "[[" .. word:gsub("</?b>", "") .. "#Chinese|" .. word .. "]]"
end)
if actual_title.nsText == "Module" then
mw.log(orig_word, "->", word)
end
return word
end
function export.show(frame)
local params = {
[1] = { required = true }, -- example
[2] = {}, -- translation
[3] = {}, -- variety
lit = {},
tr = {},
ref = {}, r = { alias_of = "ref" },
display_type = {}, type = { alias_of = "display_type" },
inline = {},
audio = {}, a = { alias_of = "audio" },
collapsed = { type = "boolean" },
link = { type = "boolean", default = true }, l = { alias_of = "link" },
-- Allow specifying pagename in testcases on documentation page.
pagename = actual_title.nsText == "Module" and {} or nil,
tr_nocap = { type = "boolean" },
}
local args, unrecognized_args = require("Module:parameters").process(frame:getParent().args, params, true)
if args.pagename then
-- Override title in Module namespace.
title = mw.title.new(args.pagename)
PAGENAME = title.text
end
local example = args[1] or error("Example unspecified.")
local translation = args[2]
local literal = args["lit"]
local reference = args["ref"]
local manual_tr = args["tr"]
local display = args["display_type"]
local inline = args["inline"]
local audio_file = args["audio"]
local collapsed = args["collapsed"]
local phonetic = ""
local original_length = mw.ustring.len(gsub(example, "[^一-龯㐀-䶵]", ""))
local variety = args[3] or (ref_list[reference] and ref_list[reference][1] or false) or "MSC"
local variety_data = variety_list[variety] or error("variety " .. variety .. " not recognized.")
local variety_code
if variety_data then
variety_code = variety_data[2]
end
local link = args["link"]
-- link = match(link, "n") == nil -- and not (not match(example, " ") and match(example, "[,。?!﹑]"))
if next(unrecognized_args) then
--[[Special:WhatLinksHere/Template:tracking/zh-usex/unrecognized arg]]
require("Module:debug").track_unrecognized_args(unrecognized_args, "zh-usex")
end
if not translation or translation == '' then -- per standard [[Module:usex]]
translation = ''
end
if not match(example, "'''") then boldify = true end
-- automatically boldify pagetitle if nothing is in bold
if boldify and not punctuation[PAGENAME] then
example = gsub(example, PAGENAME, "'''" .. PAGENAME .. "'''")
example = gsub(example, "''''''", "")
end
-- tidying up the example, making it ready for transcription
example = gsub(example, "([?!,。、“”…;:‘’|()「」『』—《》〈〉· .~])", " %1 ")
example = gsub(example, " — — ", " —— ") -- double em-dash (to be converted to single em-dash later)
example = gsub(example, "^ *", "")
example = gsub(example, " *$", "")
example = gsub(example, " +", " ")
example = gsub(example, "%'%'%'([^%']+)%'%'%'", "<b>%1</b>")
example = gsub(example, "(.)</b>%[([^%[%]]+)%]", function(first, second)
return "<b>"..first.."</b>" ~= second and first.."["..second.."]</b>" or first.."["..first.."]</b>" end)
example = gsub(example, "</b>({[^{}]+})", "%1</b>")
local ruby_start, ruby_mid, ruby_end = "<big><ruby><span class=\"Hani\">", "</span><rp> (</rp><rt><big>", "</big></rt><rp>)</rp></ruby></big>"
local ruby_words = {}
local trad_words, simp_words, tr_words = {}, {}, {}
simp_exist = (m_zh.ts_determ(gsub(example, "(.)%[%1%]", "")) == "trad" or (match(example, "%[[^%[%]]+%]") and not match(example, "(.)%[%1%]"))) and variety_code ~= "vi"
for word in mw.text.gsplit(example, " ", true) do
if gsub(gsub(word, "%{[^%}]+%}", ""), "%.", "") == PAGENAME and boldify then
word = "<b>" .. word .. "</b>"
end
local trad_word, simp_word, tr_word, ruby_word = word, false, false, ""
-- various tricks for linking and display in trad. and simp.
trad_word = gsub(trad_word, "(.)%[(.)%]", "%1")
trad_word = gsub(trad_word, "{[^{}]*}", "")
trad_word = gsub(trad_word, "[%^%.]", "")
trad_word = gsub(trad_word, "\\", "|")
trad_word = gsub(trad_word, ".", ".")
if simp_exist then
simp_word = match(word, "%[") and gsub(gsub(word .. "占[位]", "([^%[%]]*).%[(.)%]", function(a, b) return m_zh.ts(a) .. b end), "位$", "") or m_zh.ts(word)
simp_word = gsub(simp_word, "{[^{}]*}", "")
simp_word = gsub(simp_word, "[%^%.]", "")
simp_word = gsub(simp_word, "\\", "|")
simp_word = gsub(simp_word, ".", ".")
end
-- produce links
local contain_pagename = (gsub(gsub(gsub(trad_word, "</?b>", ""), "%^", ""), "-", "") == PAGENAME) and not punctuation[PAGENAME]
if match(trad_word, "|") or (link and not match(trad_word, "@") and not punctuation[word] and not contain_pagename) then
trad_word = make_link(trad_word)
if simp_exist then
simp_word = make_link(simp_word)
end
end
trad_word = gsub(trad_word, "@", "")
simp_word = simp_exist and gsub(simp_word, "@", "")
-- same tricks applied to transcription
if not manual_tr and (variety_code == "cmn" or variety_code == "yue" or variety_code == "nan" or variety_code == "hak") then
if punctuation[word] then
tr_word = punctuation[word]
else
real_word = true
local hyphen = variety_code == "nan" or variety_code == "hak"
tr_word = gsub(word, "@", "")
tr_word = gsub(tr_word, "%.", " ")
tr_word = gsub(tr_word, ".+\\", "")
tr_word = gsub(tr_word, "%[[^%[%]]+%]", "")
tr_word = gsub(tr_word, ".</b>(%{[^%}]+%})", "%1</b>")
tr_word = gsub(tr_word, "(.){([^{}]*)}",function(a, b)
if hyphen and not mw.ustring.find(a, "[a-zA-Z]") then
return "-" .. b .. "-"
else
return b
end
end)
for key,val in pairs(polysyllable_pron_correction[variety_code]) do
tr_word = gsub(tr_word, key, val)
end
tr_word = gsub(tr_word, ".", pron_correction[variety_code])
if variety_code == "cmn" then
tr_word = gsub(tr_word, "%-", "")
tr_word = m_zh.py(tr_word)
elseif variety_code == "yue" then
local m_yue_pron = mw.loadData("Module:zh/data/yue-pron")
tr_word = gsub(tr_word, ".", m_yue_pron.jyutping)
tr_word = gsub(tr_word, "([a-z])([1-9])(-?)([1-9]?)", "%1%2%3%4 ")
elseif hyphen then
tr_word = gsub(tr_word, "[一-鿌㐀-䶵 -〿𠀀-]+", function(text)
if m_zh.check_pron(text, variety_code, 1) then
return gsub(m_zh.check_pron(text, variety_code, 1), "/.+$", "")
else
text = gsub(text, ".", function(ch)
if m_zh.check_pron(ch, variety_code, 1) then
return gsub(m_zh.check_pron(ch, variety_code, 1), "/.+$", "") .. "-"
else
return ch
end
end)
return gsub(text, "-$", "")
end
end)
tr_word = gsub(tr_word, "%-+", "-")
tr_word = gsub(tr_word, "%-([^ⁿa-záíúéóḿńàìùèòǹâîûêôāīūēōṳA-ZÁÍÚÉÓḾŃÀÌÙÈÒǸÂÎÛÊÔĀĪŪĒŌṲ])", "%1")
tr_word = gsub(tr_word, "([^ⁿa-záíúéóḿńàìùèòǹâîûêôāīūēōoóòôōṳA-ZÁÍÚÉÓḾŃÀÌÙÈÒǸÂÎÛÊÔĀĪŪĒŌOÓÒÔŌṲ̄̀́̂̍͘])%-", "%1")
tr_word = gsub(tr_word, "<b>", "-<b>")
tr_word = gsub(tr_word, "</b>", "</b>-")
tr_word = gsub(tr_word, "%^%-<b>", "<b>^")
tr_word = gsub(tr_word, "^%-+", "")
tr_word = gsub(tr_word, "%-+$", "")
tr_word = gsub(tr_word, "%%%-?", "--")
end
if match(tr_word, "[一-鿌㐀-䶵𠀀-]") then
require("Module:debug").track("zh-usex/character without transliteration")
end
end
end
if variety_code == "nan" then
trad_word = gsub(trad_word, "%%", "")
simp_word = simp_exist and gsub(simp_word, "%%", "")
end
if display == "ruby" then
ruby_word = ruby_start .. trad_word .. (simp_exist and "<br>" .. simp_word or "") .. ruby_mid .. (real_word and tr_word or "") .. ruby_end
table.insert(ruby_words, ruby_word)
else
table.insert(trad_words, trad_word)
table.insert(simp_words, simp_word or nil)
table.insert(tr_words, tr_word or nil)
end
end
local tag_start = " <span style=\"color:darkgreen; font-size:x-small;\">[" -- HTML entity since "[[[w:MSC|MSC]]" is interpreted poorly
local tag_end = "]</span>"
if display == "ruby" then
tag = " <ruby><rb><big>" ..
tag_start .. variety_data[1] ..
(simp_exist
and ", [[Traditional Chinese|正體]]↑ + [[Simplified Chinese|簡體]]↓"
or ", [[Traditional Chinese|正體]] kap [[Simplified Chinese|簡體]]") .. tag_end ..
tag_start .. "''rom.'': " .. variety_data[3] .. tag_end ..
"</big></rb></ruby>"
return table.concat(ruby_words, "") .. tag .. "<dl><dd><i>" .. translation .. "</i></dd></dl>"
else
trad_text = gsub(table.concat(trad_words), "([a-zA-Z]%]%])(%[%[[a-zA-Z])", "%1 %2")
simp_text = simp_exist and gsub(table.concat(simp_words), "([a-zA-Z]%]%])(%[%[[a-zA-Z])", "%1 %2") or false
phonetic = manual_tr or (#tr_words > 0 and table.concat(tr_words, " ") or false)
-- overall transcription formatting
if phonetic then
phonetic = gsub(phonetic, " </b>", "</b> ")
phonetic = gsub(phonetic, " ", " ")
if variety_code == "yue" or variety_code == "zhx-tai" or variety_code == "zhx-teo" or variety_code == "nan-hai" or variety_code == "cmn-sze" then
phonetic = gsub(phonetic, "([a-zê]+)([1-9%-]+)", "%1<sup>%2</sup>") -- superscript tones
end
phonetic = gsub(phonetic, " ([,%.?!;:’”)])", "%1") -- remove excess spaces from punctiation
phonetic = gsub(phonetic, "([‘“(]) ", "%1")
if not manual_tr then
phonetic = gsub(phonetic, "%'([^%'])", "%1") -- allow bolding for manual translit
if variety_code == "nan" then
phonetic = gsub(phonetic, " +%-%-", "--")
end
end
-- capitalisation
if not args.tr_nocap and match(example, "[。?!]") then
phonetic = "^" .. gsub(phonetic, "([%.?!]) ", "%1 ^")
end
phonetic = gsub(phonetic, "([%.%?%!][”’]) (.)", "%1 ^%2")
phonetic = gsub(phonetic, "<br>(.)", "<br>^%1")
phonetic = gsub(phonetic, ": ([“‘])(.)", ": %1^%2")
phonetic = gsub(phonetic, "%^<b>", "<b>^")
phonetic = gsub(phonetic, "%^+.", mw.ustring.upper)
phonetic = gsub(phonetic, "%^", "")
if variety_code == "wuu" then
local wuu_pron = require("Module:wuu-pron")
phonetic = "<span class=\"IPA\">[" .. wuu_pron.ipa_conv(phonetic) .. "]</span>"
elseif variety_code == "cmn-wuh" then
phonetic = "<span class=\"IPA\">[" .. phonetic .. "]</span>"
elseif variety_code == "cdo" then
local cdo_pron = require("Module:cdo-pron")
phonetic = "<i>" .. phonetic .. "</i>" ..
(not match(phonetic, "-[^ ]+-[^ ]+-[^ ]+-")
and " / <span class=\"IPA\"><small>[" .. cdo_pron.sentence(phonetic) .. "]</small></span>"
or "")
else
phonetic = "<i>" .. phonetic .. "</i>"
end
phonetic = "<span style=\"color:#404D52\">" .. phonetic .. "</span>"
end
end
local collapse_start, collapse_end, collapse_tag, collapse_border_div, collapse_border_div_end = '', '', '', '', ''
simplified_start = '<br>'
if collapsed then
collapse_start = '<span class="mw-collapsible mw-collapsed" id="mw-customcollapsible-zhexample">'
collapse_end = '</span>'
collapse_tag = '<span class="mw-customtoggle-zhexample" style="color:darkgreen; font-size:x-small;padding-left:10px">[▼ 顯示/隱藏]</span>'
collapse_border_div = '<div style="border-left: 1px solid #930; border-left-width: 2px; padding-left: 0.8em;">'
collapse_border_div_end = '</div>'
simplified_start = '<hr>'
end
if actual_title.nsText == '' then -- fixme: probably categorize only if text contains the actual word
if reference then
-- cat = "[[Category:Chinese terms with quotations]]"
else
-- cat = "[[Category:Chinese terms with usage examples]]"
end
end
-- indentation, font and identity tags
if
((variety_code == "cmn" and original_length > 7)
or (variety_code ~= "cmn" and original_length > 5)
or reference
or (match(example, "[,。?!、:; ]") and variety_code == "wuu")
or (variety_code == "cdo" and original_length > 3)
or (inline or "" ~= "")) then
trad_text = zh_format_start_trad .. trad_text .. zh_format_end
if not phonetic then
translation = "<i>" .. translation .. "</i>"
end
if phonetic then
phonetic = "<dd>" .. collapse_start .. phonetic
translation = "<dd>" .. translation .. "</dd>"
tr_tag = tag_start .. variety_data[3] .. tag_end .. collapse_end .. "</dd>"
else
translation = "<dd>" .. translation .. "</dd>"
end
if audio_file then
audio = "<dd>[[File:" .. audio_file .. "]]</dd>"
end
trad_tag = collapse_start .. tag_start .. variety_data[1] .. ", <i>[[w:Traditional Chinese|正體]]" ..
((simp_exist or variety_code == "vi") and "" or " kap [[w:Simplified Chinese|簡體]]") .. "</i>" .. tag_end .. collapse_end .. collapse_tag
if simp_exist then
simp_text = simplified_start .. collapse_start .. zh_format_start_simp .. simp_text .. zh_format_end
simp_tag = tag_start .. variety_data[1] .. ", <i>[[w:Simplified Chinese|簡體]]</i>" .. tag_end .. collapse_end
end
if reference then
reference = "<dd>" .. collapse_start .. "<small><i>From:</i> " ..
(ref_list[reference] and ref_list[reference][2] or reference) .. "</small>" .. collapse_end .. "</dd>"
end
return collapse_border_div .. bg_format_start .. "<dl>" .. trad_text .. trad_tag .. (simp_text or "") .. (simp_tag or "") .. (reference or "") ..
(phonetic and phonetic .. tr_tag or "") .. (audio or "") .. translation .. "</dl>" .. bg_format_end .. (cat or "") .. collapse_border_div_end
else
trad_text = zh_format_start_trad .. trad_text .. zh_format_end
divider = " ― "
if variety ~= "MSC" then
ts_tag = tag_start .. variety_data[1] .. tag_end
tr_tag = tag_start .. variety_data[3] .. tag_end
end
if not phonetic then
translation = "<i>" .. translation .. "</i>"
end
if simp_exist then
simp_text = " / " .. zh_format_start_simp .. simp_text .. zh_format_end
end
if audio_file then
audio = " [[File:" .. audio_file .. "]]"
end
return bg_format_start ..
trad_text .. (simp_text or "") .. (ts_tag or "") .. divider ..
(phonetic and phonetic .. (tr_tag or "") .. (audio or "") .. divider or "") .. translation .. (literal and " (literally, “" .. literal .. "”)" or "") ..
bg_format_end .. (cat or "")
end
end
function export.migrate(text, translation, ref)
if type(text) == "table" then
if not text.args or not text.args[1] then
text = text:getParent()
end
if text.args[2] and text.args[2] ~= '' then
ref = text.args[1]
translation = text.args[3]
text = text.args[2]
else
text = text.args[1]
end
end
text = text:gsub('^[%*#: \n]+', ''):gsub('[ \n]+$', ''):gsub(' +', ' '):gsub('\n+', '<br>'):gsub('|', '\\'):gsub('\'\'\'%[%[', ' '):gsub('%]%]\'\'\'', ' '):gsub('%]%]%[%[', ' '):gsub('%]%]', ''):gsub('%[%[', '')
:gsub('\'\'\'', ''):gsub(',', ','):gsub('!', '!'):gsub('%?', '?')
if translation then
if ref and ref ~= '' then
return '{{zh-x|' .. text .. '|' .. translation .. '|ref=' .. ref .. '}}'
else
return '{{zh-x|' .. text .. '|' .. translation .. '}}'
end
else
return text
end
end
return export