Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Hopp til innhald

Modul:languages

Frå Wiktionary - den frie ordboka

Dokumentasjon for modulen kan opprettast på Modul:languages/dok

local export = {}

-- Temporarily convert various formatting characters to PUA to prevent them from being disrupted by the substitution process.
-- TODO: Handle arbitrary number of capture groups.
local function doTempSubstitutions(text, subbedChars, keepCarets)
	-- Cloning the table locally is much faster.
	local patterns = mw.clone(require("Module:languages/data/patterns"))
	if keepCarets then
		table.insert(patterns, "((\\\\)%^)")
		table.insert(patterns, "((\\)%^)")
		table.insert(patterns, "((%^))")
	end
	-- Include any spaces preceding inserted PUA characters, to prevent them from being accidentally trimmed.
	table.insert(patterns, "((%s)[\238-\239][\128-\163][\128-\191])")
	local i, pe = #subbedChars, require("Module:utilities").pattern_escape
	for _, pattern in ipairs(patterns) do
		for m1, m2, m3, m4 in text:gmatch(pattern) do
			local m, m1New = {m1, m2, m3, m4}, m1
			for j = 2, #m do
				subbedChars[i+j-1] = m[j]
				m1New = m1New:gsub(pe(m[j]), mw.ustring.char(0xE000+i+j-1), 1)
			end
			text = text:gsub(pe(m1), pe(m1New), 1)
			i = i + #m - 1
		end
	end
	return text, subbedChars
end

-- Reinsert any formatting that was temporarily substituted.
local function undoTempSubstitutions(text, subbedChars)
	local pe = require("Module:utilities").pattern_escape
	for i = 1, #subbedChars do
		text = text:gsub(mw.ustring.char(0xE000+i), pe(subbedChars[i]))
	end
	return text
end

-- Convert any HTML entities.
local function noEntities(text)
	if text:find("&[^;]+;") then
		return require("Module:utilities").get_entities(text)
	else
		return text
	end
end

-- Check if the raw text is an unsupported title, and if so return that. Otherwise, remove HTML entities. We do the pre-conversion to avoid loading the unsupported title list unnecessarily.
local function checkNoEntities(text)
	local textNoEnc = noEntities(text)
	if textNoEnc ~= text and mw.loadData("Module:links/data").unsupported_titles[text] then
		return text
	else
		return textNoEnc
	end
end

-- If no script object is provided (or if it's invalid or None), get one.
local function checkScript(text, self, sc)
	if type(sc) ~= "table" or sc._type ~= "script object" or sc:getCode() == "None" then
		return self:findBestScript(text)
	else
		return sc
	end
end

local function normalize(text, sc)
	text = sc:fixDiscouragedSequences(text)
	return sc:toFixedNFD(text)
end

-- Convert risky characters to HTML entities, which minimizes interference once returned (e.g. for "sms:a", "<!-- -->" etc.).
local function escapeRiskyChars(text)
	for _, pattern in ipairs(mw.clone(require("Module:languages/data/patterns"))) do
		text = text:gsub(pattern, function(cap1) return mw.text.encode(cap1, "\"'") end)
	end
	return mw.text.encode(text, "#%%&+/:<=>@[\\%]_{|}")
end

-- Split the text into sections, based on the presence of temporarily substituted formatting characters, then iterate over each one to apply substitutions. This avoids putting PUA characters through language-specific modules, which may be unequipped for them.
local function iterateSectionSubstitutions(text, subbedChars, keepCarets, self, sc, substitution_data, function_name)
	local pe = require("Module:utilities").pattern_escape
	local fail, cats = nil, {}
	local sections = mw.text.split(text, "[-]")
	for i, section in ipairs(sections) do
		-- Don't bother processing empty strings or whitespace (which may also not be handled well by dedicated modules).
		if section:gsub("%s", "") ~= "" then
			local sub, sub_fail, sub_cats = require("Module:languages/doSubstitutions")(section, self, sc, substitution_data, function_name)
			-- Second round of temporary substitutions, in case any formatting was added by the main substitution process. However, don't do this if the section contains formatting already (as it would have had to have been escaped to reach this stage, and therefore should be given as raw text).
			if sub and subbedChars then
				local noSub; for _, pattern in ipairs(mw.clone(require("Module:languages/data/patterns"))) do
					if section:match(pattern) then noSub = true end
				end
				if not noSub then
					sub, subbedChars = doTempSubstitutions(sub, subbedChars, keepCarets)
				end
			end
			if (not sub) or sub_fail then
				text = sub
				fail = sub_fail
				cats = sub_cats or {}
				break
			end
			text = sub and text:gsub(pe(section), pe(sub), 1) or text
			if type(sub_cats) == "table" then
				for _, cat in ipairs(sub_cats) do
					table.insert(cats, cat)
				end
			end
		end
	end
	
	-- Trim end (ignoring any final formatting characters).
	if text and text:match("[\238-\239][\128-\163][\128-\191]$") then
		text = mw.ustring.gsub(text, "(.*[^%s]+)(%s+[" .. mw.ustring.char(0xE000) .. '-' .. mw.ustring.char(0xF8FF) .. "])$", function(c1, c2) return c1 .. mw.text.trim(c2) end)
	elseif text then
		text = text:gsub("%s+$", "")
	end
	
	-- Remove duplicate categories.
	if #cats > 1 then
		cats = require("Module:table").removeDuplicates(cats)
	end
	
	return text, fail, cats, subbedChars
end

-- Process carets (and any escapes). Default to simple removal, if no pattern/replacement is given.
local function processCarets(text, pattern, repl)
	return text
		:gsub("\\\\^", "\243\176\128\128^")
		:gsub("\\^", "\243\176\128\129")
		:gsub(pattern or "%^", repl or "")
		:gsub("\243\176\128\128", "\\")
		:gsub("\243\176\128\129", "^")
end

-- Remove carets if they are used to capitalize parts of transliterations (unless they have been escaped).
local function removeCarets(text, sc)
	if not sc:hasCapitalization() and sc:isTransliterated() and text:find("%^") then
		return processCarets(text)
	else
		return text
	end
end

local Language = {}

function Language:getCode()
	return self._code
end

function Language:getCanonicalName()
	return self._rawData[1]
end

function Language:getDisplayForm()
	return self:getCanonicalName()
end

function Language:getOtherNames(onlyOtherNames)
	self:loadInExtraData()
	return require("Module:language-like").getOtherNames(self, onlyOtherNames)
end

function Language:getAliases()
	self:loadInExtraData()
	return self._extraData.aliases or {}
end

function Language:getVarieties(flatten)
	self:loadInExtraData()
	return require("Module:language-like").getVarieties(self, flatten)
end

function Language:getType()
	return self._rawData.type or "regular"
end

--[==[Given a list of types as strings, returns true if the language has all of them. Possible types are explained in [[Module:languages/data/2]] and [[Module:etymology languages/data]].]==]
function Language:hasType(...)
	if not self._type then
		self._type = {language = true}
		for _, type in ipairs(mw.text.split(self._rawData.type, "%s*,%s*")) do
			self._type[type] = true
		end
	end
	for _, type in ipairs{...} do
		if not self._type[type] then
			return false
		end
	end
	return true
end

function Language:getWikimediaLanguageCodes()
	if not self._wikimediaLanguageCodes then
		self._wikimediaLanguageCodes = type(self._rawData.wikimedia_codes) == "table" and self._rawData.wikimedia_codes or type(self._rawData.wikimedia_codes) == "string" and mw.text.split(self._rawData.wikimedia_codes, "%s*,%s*") or {self:getCode()}
	end
	return self._wikimediaLanguageCodes
end

function Language:getWikimediaLanguages()
	if not self._wikimediaLanguageObjects then
		local m_wikimedia_languages = require("Module:wikimedia languages")
		self._wikimediaLanguageObjects = {}
		local wikimedia_codes = self:getWikimediaLanguageCodes()
		
		for _, wlangcode in ipairs(wikimedia_codes) do
			table.insert(self._wikimediaLanguageObjects, m_wikimedia_languages.getByCode(wlangcode))
		end
	end
	
	return self._wikimediaLanguageObjects
end

function Language:getWikipediaArticle()
	if self._rawData.wikipedia_article then
		return self._rawData.wikipedia_article
	elseif self._wikipedia_article then
		return self._wikipedia_article
	elseif self:getWikidataItem() and mw.wikibase then
		self._wikipedia_article = mw.wikibase.sitelink(self:getWikidataItem(), 'enwiki')
	end
	if not self._wikipedia_article then
		self._wikipedia_article = self:getCategoryName():gsub("Creole language", "Creole")
	end
	
	return self._wikipedia_article
end

function Language:makeWikipediaLink()
	return "[[w:" .. self:getWikipediaArticle() .. "|" .. self:getCanonicalName() .. "]]"
end

function Language:getWikidataItem()
	local item = self._rawData[2]
	
	if type(item) == "number" then
		return "Q" .. item
	else
		return item
	end
end

function Language:getScriptCodes()
	if not self._scriptCodes then
		self._scriptCodes = type(self._rawData[4]) == "table" and self._rawData[4] or type(self._rawData[4]) == "string" and mw.text.split(self._rawData[4], "%s*,%s*") or {"None"}
	end
	return self._scriptCodes
end

function Language:getScripts()
	if not self._scriptObjects then
		local m_scripts = require("Module:scripts")
		self._scriptObjects = {}
		
		if self:getScriptCodes()[1] == "All" then
			self._scriptObjects = mw.loadData("Module:scripts/data")
		else
			for _, sc in ipairs(self:getScriptCodes()) do
				table.insert(self._scriptObjects, m_scripts.getByCode(sc))
			end
		end
	end
	
	return self._scriptObjects
end

-- Find the best script to use, based on the characters of a string. If forceDetect is set, run the detection algorithm even if there's only one possible script; in that case, if the text isn't in the script, the return value will be None.
function Language:findBestScript(text, forceDetect)
	if (not text) or text == "" or text == "-" then
		return require("Module:scripts").getByCode("None")
	end
	
	if table.concat(self:getScriptCodes()) == "All" then
		return require("Module:scripts").findBestScriptWithoutLang(text)
	end
	
	local scripts = self:getScripts()
	
	if not scripts[2] and not forceDetect then
		-- Necessary, because Hani covers the entire Han range (while the Hant & Hans lists don't list shared characters).
		if scripts[1]:getCode():match("^Han") and require("Module:scripts").getByCode("Hani"):countCharacters(text) > 0 then
			return scripts[1]
		elseif scripts[1]:countCharacters(text) > 0 then
			return scripts[1]
		else
			return require("Module:scripts").getByCode("None")
		end
	end
	
	return require("Module:languages/findBestScript")(export, self, text, scripts, forceDetect)
end

function Language:getFamily()
	if self._familyObject then
		return self._familyObject
	end
		
	if self._rawData[3] then
		self._familyObject = require("Module:families").getByCode(self._rawData[3])
	end
	
	return self._familyObject
end

function Language:getAncestorCodes()
	if not self._ancestorCodes then
		self._ancestorCodes = type(self._rawData.ancestors) == "table" and self._rawData.ancestors or type(self._rawData.ancestors) == "string" and mw.text.split(self._rawData.ancestors, "%s*,%s*") or nil
	end
	return self._ancestorCodes
end

function Language:getAncestors()
	if not self._ancestorObjects then
		self._ancestorObjects = {}
		
		local ancestors
		if self._rawData.ancestors then
			ancestors = self:getAncestorCodes()
			
			for _, ancestor in ipairs(ancestors) do
				table.insert(self._ancestorObjects, export.getByCode(ancestor) or require("Module:etymology languages").getByCode(ancestor))
			end
		else
			local fam = self:getFamily()
			local protoLang = fam and fam:getProtoLanguage() or nil
			
			-- For the case where the current language is the proto-language
			-- of its family, we need to step up a level higher right from the start.
			if protoLang and protoLang:getCode() == self:getCode() then
				fam = fam:getFamily()
				protoLang = fam and fam:getProtoLanguage() or nil
			end
			
			while not protoLang and not (not fam or fam:getCode() == "qfa-not") do
				fam = fam:getFamily()
				protoLang = fam and fam:getProtoLanguage() or nil
			end
			
			table.insert(self._ancestorObjects, protoLang)
		end
	end
	
	return self._ancestorObjects
end

local function iterateOverAncestorTree(node, func)
	for _, ancestor in ipairs(node:getAncestors()) do
		if ancestor then
			local ret = func(ancestor) or iterateOverAncestorTree(ancestor, func)
			if ret then
				return ret
			end
		end
	end
end

function Language:getAncestorChain()
	if not self._ancestorChain then
		self._ancestorChain = {}
		
		local step = self
		while true do
			local ancestors = step:getAncestors()
			step = #ancestors == 1 and ancestors[1] or nil
			if not step then break end
			table.insert(self._ancestorChain, 1, step)
		end
	end
	
	return self._ancestorChain
end

function Language:hasAncestor(otherlang)
	local function compare(ancestor)
		return ancestor:getCode() == otherlang:getCode()
	end
	
	return iterateOverAncestorTree(self, compare) or false
end

function Language:getCategoryName(nocap)
	local name = self:getCanonicalName()
	
	if not nocap then
		name = mw.getContentLanguage():ucfirst(name)
	end
	return name
end

function Language:makeCategoryLink()
	return "[[:Category:" .. self:getCategoryName() .. "|" .. self:getDisplayForm() .. "]]"
end

function Language:getStandardCharacters()
	return self._rawData.standardChars
end

-- Make the entry name (i.e. the correct page name).
function Language:makeEntryName(text, sc, escape_characters)
	if (not text) or text == "" then
		return text, nil, {}
	end
	-- Remove bold, soft hyphens, strip markers and HTML tags.
	text = text
		:gsub("('*)'''(.-'*)'''", "%1%2")
		:gsub("­", "")
	text = mw.text.unstrip(text)
		:gsub("<[^<>]+>", "")
	-- Don't remove italics, as that would allow people to use it instead of {{m}} etc.
	
	local textWithEnc = text
	text = mw.uri.decode(text)
	text = noEntities(text)
	
	-- Check if the text is an interwiki link.
	if text:find(":") then
		-- If this is an interwiki link, a link to another namespace or there's an initial colon, return what we have.
		local check, m_utildata = text:match("^:*([^:]*):"), mw.loadData("Module:utilities/data")
		if m_utildata.interwikis[check] or m_utildata.namespaces[check] then
			return text, nil, {}
		else
			check, m_utildata = nil
			-- Convert any escaped colons.
			text = text:gsub("\\:", ":")
			textWithEnc = textWithEnc:gsub("\\:", ":")
		end
	end
	
	-- Check if the text is an unsupported title (with and without converting percent encoding/HTML entities).
	local unsupportedTitles = mw.loadData("Module:links/data").unsupported_titles
	if unsupportedTitles[text] or unsupportedTitles[textWithEnc] then
		return "Unsupported titles/" .. (unsupportedTitles[text] or unsupportedTitles[textWithEnc]), nil, {}
	end
	
	sc = checkScript(text, self, sc)
	
	local fail, cats
	text = normalize(text, sc)
	text, fail, cats = iterateSectionSubstitutions(text, nil, nil, self, sc, self._rawData.entry_name, "makeEntryName")
	
	text = removeCarets(text, sc)
	text = mw.ustring.match(text, "^[¿¡]?(.-[^%s%p].-)%s*[؟?!;՛՜ ՞ ՟?!︖︕।॥။၊་།]?$") or text
	
	return (escape_characters == false and text or escapeRiskyChars(text)), fail, cats
end

-- Generates alternative forms using a specified method, and returns them as a table. If no method is specified, returns a table containing only the input term.
function Language:generateForms(text, sc)
	if self._rawData.generate_forms then
		sc = checkScript(text, self, sc)
		return require("Module:" .. self._rawData.generate_forms).generateForms(text, self:getCode(), sc:getCode())
	else
		return {text}
	end
end

function Language:makeSortKey(text, sc)
	if (not text) or text == "" then
		return text, nil, {}
	end
	-- Remove soft hyphens, strip markers and HTML tags.
	text = text:gsub("­", "")
	text = mw.text.unstrip(text)
		:gsub("<[^<>]+>", "")
	
	text = checkNoEntities(text)
	
	-- Remove initial hyphens and *.
	text = mw.ustring.gsub(text, "^[-־ـ᠊*]+(.)", "%1")
	
	sc = checkScript(text, self, sc)
	
	text = normalize(text, sc)
	text = removeCarets(text, sc)
	
	-- For languages with dotted dotless i, ensure that "İ" is sorted as "i", and "I" is sorted as "ı".
	if self._rawData.dotted_dotless_i then
		text = text
			:gsub(mw.ustring.toNFD("İ"), "i")
			:gsub("I", "ı")
		text = sc:toFixedNFD(text)
	end
	-- Convert to lowercase, make the sortkey, then convert to uppercase. Where the language has dotted dotless i, it is usually not necessary to convert "i" to "İ" and "ı" to "I" first, because "I" will always be interpreted as conventional "I" (not dotless "İ") by any sorting algorithms, which will have been taken into account by the sortkey substitutions themselves. However, if no sortkey substitutions have been specified, then conversion is necessary so as to prevent "i" and "ı" both being sorted as "I".
	local fail, cats
	text = mw.ustring.lower(text)
	text, fail, cats = iterateSectionSubstitutions(text, nil, nil, self, sc, self._rawData.sort_key, "makeSortKey")
	if self._rawData.dotted_dotless_i and not self._rawData.sort_key then
		text = text
			:gsub("ı", "I")
			:gsub("i", "İ")
		text = sc:toFixedNFC(text)
	end
	text = mw.ustring.upper(text)
	
	-- Remove parentheses, as long as they are either preceded or followed by something.
	text = text
		:gsub("(.)[()]+", "%1")
		:gsub("[()]+(.)", "%1")
	
	return escapeRiskyChars(text), fail, cats
end

-- Create the form used as as a basis for display text and transliteration.
local function processDisplayText(text, self, sc, keepCarets, keepPrefixes)
	local subbedChars = {}
	text, subbedChars = doTempSubstitutions(text, subbedChars, keepCarets)
	
	text = checkNoEntities(text)
	
	sc = checkScript(text, self, sc)
	
	local fail, cats
	text = normalize(text, sc)
	text, fail, cats, subbedChars = iterateSectionSubstitutions(text, subbedChars, keepCarets, self, sc, self._rawData.display_text, "makeDisplayText")
	
	text = removeCarets(text, sc)
	
	-- Remove any interwiki link prefixes (unless they have been escaped or this has been disabled).
	if text:find(":") and not keepPrefixes then
		text = text
			:gsub("\\\\:", "\243\176\128\128:")
			:gsub("\\:", "\243\176\128\129")
		local prefix, oldText, m_utildata = text:match("^([^:]*):"), text, mw.loadData("Module:utilities/data")
		while m_utildata.interwikis[prefix] or prefix == "" do
			oldText = text
			text = text:gsub("^" .. prefix .. ":", "")
			prefix = text:match("^([^:]*):")
		end
		-- If the whole text has been removed (i.e. the text ends with a colon), then the final prefix is not actually a prefix.
		if text == "" then text = oldText end
		text = text
			:gsub("\243\176\128\128", "\\")
			:gsub("\243\176\128\129", ":")
	end
	
	return text, fail, cats, subbedChars
end

-- Make the display text (i.e. what is displayed on the page).
function Language:makeDisplayText(text, sc, keepPrefixes)
	if (not text) or text == "" then
		return text, nil, {}
	end
	local fail, cats, subbedChars
	text, fail, cats, subbedChars = processDisplayText(text, self, sc, nil, keepPrefixes)
	text = escapeRiskyChars(text)
	return undoTempSubstitutions(text, subbedChars), fail, cats
end

function Language:transliterate(text, sc, module_override)
	-- If there is no text, or the language doesn't have transliteration data and there's no override, return nil.
	if not (self._rawData.translit or module_override) then
		return nil, true, {}
	elseif (not text) or text == "" or text == "-" then
		return text, nil, {}
	end
	-- If the script is not transliteratable (and no override is given), return nil.
	sc = checkScript(text, self, sc)
	if not (sc:isTransliterated() or module_override) then
		return nil, true, {}
	end
	
	-- Remove any strip markers.
	text = mw.text.unstrip(text)
	
	-- Get the display text with the keepCarets flag set.
	local fail, cats, subbedChars
	text, fail, cats, subbedChars = processDisplayText(text, self, sc, true)
	
	-- Transliterate (using the module override if applicable).
	text, fail, cats, subbedChars = iterateSectionSubstitutions(text, subbedChars, true, self, sc, module_override or self._rawData.translit, "tr")
	
	-- Incomplete transliterations return nil.
	-- FIXME: Handle transliterations with characters that are in both Latn/Latinx and a transliteratable script (e.g. U+A700-U+A707 are in Latinx and Hani).
	if (not text) or sc:countCharacters(text) > 0 then
		return nil, true, cats
	end
	
	text = escapeRiskyChars(text)
	text = undoTempSubstitutions(text, subbedChars)
	
	-- If the script does not use capitalization, then capitalize any letters of the transliteration which are immediately preceded by a caret (and remove the caret).
	if text and not sc:hasCapitalization() and text:find("%^") then
		text = processCarets(text, "%^([%z\1-\127\194-\244][\128-\191]*)", mw.ustring.upper)
	end
	
	-- Track module overrides.
	if module_override ~= nil then
		require("Module:debug").track("module_override")
	end
	
	return text, fail, cats
end

function Language:overrideManualTranslit()
	return not not self._rawData.override_translit
end

function Language:hasTranslit()
	return not not self._rawData.translit
end

function Language:link_tr()
	return not not self._rawData.link_tr
end

-- Provides a way to apply a substitution method via gsub (or a series of gsubs), where the output is dependent on every substitution being successful (e.g. in a term).
function Language:gsubSubstitutions(text, sc, method, patterns)
	local get_entities = require("Module:utilities").get_entities
	local categories, section_categories, fail, fail_message = {}
	
	local function process_section(pre, section, post)
		section = get_entities(section)
		section, fail, section_categories = self[method](self, section, sc)
		if type(section_categories) == "table" then
			for i, category in ipairs(section_categories) do
				table.insert(categories, category)
			end
		end
		if fail then
			fail_message = section
			categories = section_categories
		end
		return (pre or "") .. (section or "") .. (post or "")
	end
	
	for i, pattern in ipairs(patterns) do
		text = text:gsub(pattern, process_section)
		if fail then break end
	end
	
	return (fail_message or text), fail, categories
end

function Language:toJSON(returnTable)
	local entryNamePatterns = nil
	local entryNameRemoveDiacritics = nil
	
	if self._rawData.entry_name then
		entryNameRemoveDiacritics = self._rawData.entry_name.remove_diacritics
		if self._rawData.entry_name.from then
			entryNamePatterns = {}
			for i, from in ipairs(self._rawData.entry_name.from) do
				table.insert(entryNamePatterns, {from = from, to = self._rawData.entry_name.to[i] or ""})
			end
		end
	end
	
	local ret = {
		ancestors = self:getAncestorCodes(),
		canonicalName = self:getCanonicalName(),
		categoryName = self:getCategoryName("nocap"),
		code = self:getCode(),
		entryNamePatterns = entryNamePatterns,
		entryNameRemoveDiacritics = entryNameRemoveDiacritics,
		family = self._rawData[3],
		otherNames = self:getOtherNames(true),
		aliases = self:getAliases(),
		varieties = self:getVarieties(),
		scripts = self:getScriptCodes(),
		type = self:getType(),
		wikimediaLanguages = self:getWikimediaLanguageCodes(),
		wikidataItem = self:getWikidataItem(),
	}

	if returnTable then
		return ret
	end

	return require("Module:JSON").toJSON(ret)
end

-- Do NOT use these methods!
-- All uses should be pre-approved on the talk page!
function Language:getRawData()
	return self._rawData
end

function Language:getRawExtraData()
	self:loadInExtraData()
	return self._extraData
end

Language.__index = Language

function export.getDataModuleName(code)
	if code:find("^%l%l$") then
		return "languages/data/2"
	elseif code:find("^%l%l%l$") then
		local prefix = code:sub(1, 1)
		return "languages/data/3/" .. prefix
	elseif code:find("^[%l-]+$") then
		return "languages/data/exceptional"
	else
		return nil
	end
end


function export.getExtraDataModuleName(code)
	local dataModule = export.getDataModuleName(code)
	return dataModule and dataModule .. "/extra" or nil
end

local function getRawLanguageData(code)
	local modulename = export.getDataModuleName(code)
	return modulename and mw.loadData("Module:" .. modulename)[code] or nil
end

local function getRawExtraLanguageData(code)
	local modulename = export.getExtraDataModuleName(code)
	return modulename and mw.loadData("Module:" .. modulename)[code] or nil
end

function Language:loadInExtraData()
	if not self._extraData then
		-- load extra data from module and assign to _extraData field
		-- use empty table as a fallback if extra data is nil
		self._extraData = getRawExtraLanguageData(self:getCode()) or {}
	end
end

function export.makeObject(code, data)
	if data and data.deprecated then
		require("Module:debug").track {
			"languages/deprecated",
			"languages/deprecated/" .. code
		}
	end
	
	return data and setmetatable({_rawData = data, _code = code, _type = "language object"}, Language) or nil
end

function export.getByCode(code, paramForError, allowEtymLang, allowFamily)
	if type(code) ~= "string" then
		error("The function getByCode expects a string as its first argument, but received " .. (code == nil and "nil" or "a " .. type(code)) .. ".")
	end
	
	local retval = export.makeObject(code, getRawLanguageData(code))
	if not retval and allowEtymLang then
		retval = require("Module:etymology languages").getByCode(code)
	end
	if not retval and allowFamily then
		retval = require("Module:families").getByCode(code)
	end
	if not retval and paramForError then
		require("Module:languages/errorGetBy").code(code, paramForError, allowEtymLang, allowFamily)
	end
	return retval
end

function export.getByName(name, errorIfInvalid)
	local byName = mw.loadData("Module:languages/by name")
	local code = byName.all and byName.all[name] or byName[name]
	
	if not code then
		if errorIfInvalid then
			error("The language name \"" .. name .. "\" is not valid. See [[Wiktionary:List of languages]].")
		else
			return nil
		end
	end
	
	return export.makeObject(code, getRawLanguageData(code))
end

function export.getByCanonicalName(name, errorIfInvalid, allowEtymLang, allowFamily)
	local byName = mw.loadData("Module:languages/canonical names")
	local code = byName and byName[name]

	local retval = code and export.makeObject(code, getRawLanguageData(code)) or nil
	if not retval and allowEtymLang then
		retval = require("Module:etymology languages").getByCanonicalName(name)
	end
	if not retval and allowFamily then
		local famname = name:match("^(.*) languages$")
		famname = famname or name
		retval = require("Module:families").getByCanonicalName(famname)
	end
	if not retval and errorIfInvalid then
		require("Module:languages/errorGetBy").canonicalName(name, allowEtymLang, allowFamily)
	end
	return retval
end

--[[	If language is an etymology language, iterates through parent languages
		until it finds a non-etymology language. ]]
function export.getNonEtymological(lang)
	while lang:getType() == "etymology language" do
		local parentCode = lang:getParentCode()
		lang = export.getByCode(parentCode)
			or require("Module:etymology languages").getByCode(parentCode)
			or require("Module:families").getByCode(parentCode)
	end
	
	return lang
end

-- for backwards compatibility only; modules should require the /error themselves
function export.err(lang_code, param, code_desc, template_tag, not_real_lang)
	return require("Module:languages/error")(lang_code, param, code_desc, template_tag, not_real_lang)
end

return export