Module:Tibt-sortkey
Appearance
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
local m_str_utils = require("Module:string utilities")
local gsub = m_str_utils.gsub
local len = m_str_utils.len
local match = m_str_utils.match
local sub = m_str_utils.sub
local toNFC = mw.ustring.toNFC
local u = m_str_utils.char
local Tibt = require("Module:Tibt-common")
local a, b, c, d = u(0xE000), u(0xE001), u(0xE002), u(0xE003)
local letters = {
{"ཀ", "ྐ"}, {"ཀ༹", "ྐ༹", ""}, {"ཫ", "ཫ"}, {"ཫ༹", "ཫ༹", ""}, {"ཁ", "ྑ"}, {"ཁ༹", "ྑ༹", ""}, {"ག", "ྒ"}, {"ག༹", "ྒ༹", ""}, {"ང", "ྔ"}, {"ང༹", "ྔ༹", ""}, {"ཅ", "ྕ"}, {"ཆ", "ྖ"}, {"ཇ", "ྗ"}, {"ཉ", "ྙ"}, {"ཉ༹", "ྙ༹", ""}, {"ཊ", "ྚ"}, {"ཊ༹", "ྚ༹", ""}, {"ཋ", "ྛ"}, {"ཋ༹", "ྛ༹", ""}, {"ཌ", "ྜ"}, {"ཌ༹", "ྜ༹", ""}, {"ཎ", "ྞ"}, {"ཎ༹", "ྞ༹", ""}, {"ཏ", "ྟ"}, {"ཏ༹", "ྟ༹", ""}, {"ཐ", "ྠ"}, {"ཐ༹", "ྠ༹", ""}, {"ད", "ྡ"}, {"ད༹", "ྡ༹", ""}, {"ན", "ྣ"}, {"ན༹", "ྣ༹", ""}, {"པ", "ྤ"}, {"པ༹", "ྤ༹", ""}, {"ཕ", "ྥ"}, {"ཕ༹", "ྥ༹", ""}, {"བ", "ྦ"}, {"བ༹", "ྦ༹", ""}, {"མ", "ྨ"}, {"མ༹", "ྨ༹", ""}, {"ཙ", "ྩ"}, {"ཚ", "ྪ"}, {"ཛ", "ྫ"}, {"ཝ", "ྭ"}, {"ཝ༹", "ྭ༹", ""}, {"ཞ", "ྮ"}, {"ཞ༹", "ྮ༹", ""}, {"ཟ", "ྯ"}, {"ཟ༹", "ྯ༹", ""}, {"འ", "ྰ"}, {"འ༹", "ྰ༹", ""}, {"ཡ", "ྱ"}, {"ཡ༹", "ྱ༹", ""}, {"ར", "ྲ"}, {"ར༹", "ྲ༹", ""}, {"ཬ", "ཬ"}, {"ཬ༹", "ཬ༹", ""}, {"ལ", "ླ"}, {"ལ༹", "ླ༹", ""}, {"ཤ", "ྴ"}, {"ཤ༹", "ྴ༹", ""}, {"ཥ", "ྵ"}, {"ཥ༹", "ྵ༹", ""}, {"ས", "ྶ"}, {"ས༹", "ྶ༹", ""}, {"ཧ", "ྷ"}, {"ཧ༹", "ྷ༹", ""}, {"ཨ", "ྸ"}, {"ཨ༹", "ྸ༹", ""}, {"ཱ", "ཱ"}, {"ི", "ི"}, {u(0xF73), "ཱི"}, {"ུ", "ུ"}, {u(0xF75), "ཱུ"}, {u(0xF76), "ྲྀ"}, {u(0xF77), "ྲཱྀ"}, {u(0xF78), "ླྀ"}, {u(0xF79), "ླཱྀ"}, {"ེ", "ེ"}, {"ཻ", "ཻ"}, {"ོ", "ོ"}, {"ཽ", "ཽ"}
}
local function findAffixes(text, mainStack)
return (gsub(text, "(.*)" .. mainStack .. ".*", "%1")), (gsub(text, ".*" .. mainStack .. "(.*)", "%1"))
end
local function findVowel(mainStack)
return (gsub(mainStack, "[ཱ-ཽྀ]+", "")), match(mainStack, "[ཱ-ཽྀ]+") or ""
end
local function mainStackParts(mainStack)
local superjoined = match(mainStack, "(ར)[ྐྒྔྗྙྟྡྣྦྨྩྫ]") or match(mainStack, "(ལ)[ྐྒྔྕྗྟྡྤྦྷ]") or match(mainStack, "(ས)[ྐྒྔྙྟྡྣྤྦྨྩ]") or ""
if (superjoined == "ར" and match(mainStack, "ར[^ྐྒྨ]ྱ")) or (superjoined == "ས" and (match(mainStack, "ས[^ྐྒྤྦྨ]ྱ") or match(mainStack, "ས[^ྐྒྣྤྦྨ]ྲ"))) then
superjoined = ""
end
local radical = match(mainStack, "^" .. superjoined .. "(.)")
local subjoined = match(mainStack, "^" .. superjoined .. radical .. "(.*)")
for _, letter in ipairs(letters) do
radical = gsub(radical, letter[2], letter[1])
end
return superjoined, radical, subjoined
end
local function sortRadical(radical)
for _, letter in ipairs(letters) do
if letter[3] then
radical = gsub(radical, letter[3], letter[1])
end
end
radical = gsub(radical, "༹", b)
local radicalSubs = {
["ཫ" .. b] = "ཀ" .. d, ["ཬ" .. b] = "ར" .. d
}
local radicalSubs2 = {
["ཫ"] = "ཀ" .. c, ["ཬ"] = "ར" .. c
}
for char, replacement in pairs(radicalSubs) do
radical = gsub(radical, char, replacement)
end
radical = gsub(radical, ".", radicalSubs2)
return (gsub(radical, "([^" .. b .. "-" .. d .. "])$", "%1" .. a))
end
-- Convert into base-6724 to reduce length.
function baseConvert(value)
if #value%2 ~= 0 then table.insert(value, 1, 0) end
local newValue = {}
for i = 1, #value/2 do
newValue[i] = u(0x4E00+(value[(i*2)-1]*(#letters+1))+value[i*2])
end
return table.concat(newValue)
end
local function sortValue(part, partType)
local length
if partType == "superjoined" or partType == "prefix" or partType == "vowel" then
length = 1
elseif partType == "subjoined" then
length = 9
elseif partType == "suffix" then
length = 6
end
local partLetters = {}
for i = 1, length do
if len(part) >= i then
table.insert(partLetters, sub(part, i, i))
else
table.insert(partLetters, "")
end
end
for i, partLetter in ipairs(partLetters) do
for j, letter in ipairs(letters) do
if partLetter == letter[1] or partLetter == letter[2] or partLetter == letter[3] then
partLetters[i] = j
end
end
if match(tostring(partLetters[i]), "[^0-9]") or partLetters[i] == "" then
partLetters[i] = 0
end
end
return partLetters
end
function export.makeSortKey(text, lang, sc)
local langObj
if not lang then
error("Language code required.")
else
langObj = require("Module:languages").getByCode(lang)
end
if not sc then
sc = langObj:findBestScript(text):getCode()
end
if sc ~= "Tibt" then
return text
end
text = (langObj:makeEntryName(text))
local initSubs = {
["ཪ"] = "ར", ["ྺ"] = "ྭ", ["ྻ"] = "ྱ", ["ྼ"] = "ྲ"
}
text = gsub(text, ".", initSubs)
local syllables = {}
local sort, prefix, mainStack, superjoined, radical, subjoined, vowel, suffix
for word in Tibt.getWords(text) do
for syllable in Tibt.getSyllables(word) do
mainStack = Tibt.findMainStack(syllable, lang)
for _, letter in ipairs(letters) do
if letter[3] then
syllable = gsub(syllable, letter[1], letter[3])
syllable = gsub(syllable, letter[2], letter[3])
mainStack = gsub(mainStack, letter[1], letter[3])
mainStack = gsub(mainStack, letter[2], letter[3])
end
end
for i = 42, #letters do
syllable = gsub(syllable, letters[i][2], letters[i][1])
mainStack = gsub(mainStack, letters[i][2], letters[i][1])
end
prefix, suffix = findAffixes(syllable, mainStack)
mainStack, vowel = findVowel(mainStack)
superjoined, radical, subjoined = mainStackParts(mainStack)
local set1 = {table.concat(sortValue(superjoined, "superjoined")), table.concat(sortValue(prefix, "prefix"))}
local set2 = sortValue(subjoined, "subjoined")
table.insert(set2, table.concat(sortValue(vowel, "vowel")))
local set3 = sortValue(suffix, "suffix")
sort = sortRadical(radical) .. baseConvert(set1) .. baseConvert(set2) .. baseConvert(set3)
table.insert(syllables, sort)
end
end
text = table.concat(syllables)
if match(text, ".[་༌]") or match(text, "[་༌].") then
text = gsub(text, "[་༌]", "")
end
return toNFC(text)
end
local bo = require("Module:languages").getByCode("bo")
local function tag(text)
return require("Module:script utilities").tag_text(text, bo)
end
function export.showSorting(frame)
local terms = {}
for _, term in ipairs(frame.args) do
table.insert(terms, term)
end
local makeSortKey = require("Module:memoize")(export.makeSortKey)
local function comp(term1, term2)
return makeSortKey(term1, "bo", "Tibt") < makeSortKey(term2, "bo", "Tibt")
end
table.sort(terms, comp)
for i, term in pairs(terms) do
local sc = require("Module:scripts").getByCode("Tibt")
local sortkey = export.makeSortKey(term, "bo", sc)
terms[i] = "\n* " .. tag(term)
end
return table.concat(terms)
end
return export
Categories:
- Sortkey-generating modules by script
- Tibetan script modules
- Sortkey-generating modules
- Tibetan modules
- Nyenkha modules
- Chocangaca modules
- Lunanakha modules
- Zhang-Zhung modules
- Sikkimese modules
- Dakpa modules
- Old Tibetan modules
- Ladakhi modules
- Sanskrit modules
- Zangskari modules
- Adap modules
- Balti modules
- Kurtöp modules
- Gahri modules
- Dzongkha modules
- Sherpa modules
- Classical Tibetan modules
- Idu modules
- Khengkha modules
- Kalaktang Monpa modules
- Brokkat modules
- Chali modules
- Brokpake modules
- Dzala modules
- Manangba modules
- Bumthangkha modules
- Tawang Monpa modules
- Tshangla modules
- Kutang Ghale modules
- Olekha modules
- Layakha modules
- Changthang modules
- Bokar modules
- Templates and modules needing documentation