Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Jump to content

Module:el-translit

From Wiktionary, the free dictionary

This module will transliterate Greek language text per WT:EL TR. It is also used to transliterate Cappadocian Greek, Calabrian Greek, Italiot Greek, Pontic Greek, and Tsakonian. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:el-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local m_str_utils = require("Module:string utilities")

local gsub = m_str_utils.gsub
local sub = m_str_utils.sub
local U = m_str_utils.char

local acute = U(0x301)
local diaeresis = U(0x308)
local erotimatiko = U(0x37E)

local export = {}

local tt = {
	["α"] = "a",  ["ά"] = "á",  ["β"] = "v",  ["γ"] = "g",  ["δ"] = "d",
	["ε"] = "e",  ["έ"] = "é",  ["ζ"] = "z",  ["η"] = "i",  ["ή"] = "í",
	["θ"] = "th", ["ι"] = "i",  ["ί"] = "í",  ["ϊ"] = "ï",  ["ΐ"] = "ḯ",
	["κ"] = "k",  ["λ"] = "l",  ["μ"] = "m",  ["ν"] = "n",  ["ξ"] = "x",
	["ο"] = "o",  ["ό"] = "ó",  ["π"] = "p",  ["ρ"] = "r",  ["σ"] = "s",
	["ς"] = "s",  ["τ"] = "t",  ["υ"] = "y",  ["ύ"] = "ý",  ["ϋ"] = "ÿ",
	["ΰ"] = "ÿ́",  ["φ"] = "f",  ["χ"] = "ch", ["ψ"] = "ps", ["ω"] = "o",
	["ώ"] = "ó",
	["Α"] = "A",  ["Ά"] = "Á",  ["Β"] = "V",  ["Γ"] = "G",  ["Δ"] = "D",
	["Ε"] = "E",  ["Έ"] = "É",  ["Ζ"] = "Z",  ["Η"] = "I",  ["Ή"] = "Í",
	["Θ"] = "Th", ["Ι"] = "I",  ["Ί"] = "Í",  ["Κ"] = "K",  ["Λ"] = "L",
	["Μ"] = "M",  ["Ν"] = "N",  ["Ξ"] = "X",  ["Ο"] = "O",  ["Ό"] = "Ó",
	["Π"] = "P",  ["Ρ"] = "R",  ["Σ"] = "S",  ["Τ"] = "T",  ["Υ"] = "Y",
	["Ύ"] = "Ý",  ["Φ"] = "F",  ["Χ"] = "Ch", ["Ψ"] = "Ps", ["Ω"] = "O",
	["Ώ"] = "Ó",
-- punctuation
	["·"] = ";",
}

-- transliterates any words or phrases
function export.tr(text, lang, sc)
	
	text = gsub(gsub(text, "χ̌", "š"), "Χ̌", "Š") -- dialectal
	text = gsub(gsub(text, "ά̤", "ä́"), "Ά̤", "Ä́") -- dialectal
	text = gsub(gsub(text, "α̤", "ä"), "Α̤", "Ä") -- dialectal
	text = gsub(gsub(text, "ό̤", "ö́"), "Ό̤", "Ö́") -- dialectal
	text = gsub(gsub(text, "ο̤", "ö"), "Ο̤", "Ö") -- dialectal

	text = gsub(text, "([^A-Za-z0-9])[;" .. erotimatiko .. "]", "%1?")

	text = gsub(text, "([αεηΑΕΗ])([υύ])()",
				function (vowel, upsilon, position)
					-- Find next character that is not whitespace or punctuation.
					local following = ""
					while true do
						local next = sub(text, position, position)
						if next == "" then -- reached end of string
							break
						elseif next:find "[%s%p]" then
							position = position + 1
						else
							following = next
							break
						end
					end
					return tt[vowel]
						.. (upsilon == "ύ" and acute or "")
						.. ((following == "" or ("θκξπσςτφχψ"):find(following, 1, true)) and "f" or "v")
				end)

	text = gsub(text, "([αεοωΑΕΟΩ])([ηή])",
				function (vowel, ita)
					if ita == "ή" then
						return tt[vowel] .. "i" .. diaeresis .. acute
					else
						return tt[vowel] .. "i" .. diaeresis
					end
				end)

	text = gsub(text, "[ωΩ][ιί]",
				{["ωι"] = "oï", ["ωί"] = "oḯ",
				 ["Ωι"] = "Oï", ["Ωί"] = "Oḯ"})

	text = gsub(text, "[οΟ][υύ]",
				{["ου"] = "ou", ["ού"] = "oú",
				 ["Ου"] = "Ou", ["Ού"] = "Oú"})

	text = gsub(text, "(.?)([μΜ])π",
				function (before, mi)
					if before == "" or before == " " or before == "-" then
						if mi == "Μ" then
							return before .. "B"
						else
							return before .. "b"
						end
					end
				end)

	text = gsub(text, "γ([γξχ])", "n%1")

	text = gsub(text, ".", tt)

	return text
end

return export