Модуль:User:Erutuon/ja

Wiktionary дан

Documentation for this module may be created at Модуль:User:Erutuon/ja/doc

local export = {}

local find = mw.ustring.find
local replace = mw.ustring.gsub
local gmatch = mw.ustring.gmatch

local kataPattern = require("Module:scripts").getByCode("Kana"):getCharacters()
local hiraPattern = require("Module:scripts").getByCode("Hira"):getCharacters()
local kanaPattern = kataPattern .. hiraPattern

local namespace = mw.title.getCurrentTitle().nsText

local hiraToVowel = mw.loadData("Module:User:Erutuon/ja/data")

local function ifNotEmpty(var)
	if var == "" then
		return nil
	else
		return var
	end
end

local function logOrThrowError(message)
	if namespace == "Module" or namespace == "User" then
		error(message)
	else
		mw.log(message)
	end
end

local function getNonKana(kana)
	-- Remove non-word characters.
	local kana = replace(kana, "%W", "")
	
	-- Remove kana.
	local nonKana = replace(kana, "[" .. kanaPattern .. "]+", "")
	
	return nonKana
end

local function isKana(kana)
	return getNonKana(kana) == ""
end
	
local function addAtIndex(list, index, item)
	if list[index] then
		list[index] = list[index] .. item
	else
		list[index] = item
	end
end

local function formsLongVowel(kana1, kana2)
	if not (kana1 and kana2) then
		return nil
	end
	
	local vowel = hiraToVowel[kana1]
	
	local isLengthenerFor = {
		["a"] = { ["あ"] = true, },
		["e"] = { ["い"] = true, ["え"] = true, },
		["o"] = { ["う"] = true, ["お"] = true, },
		["i"] = { ["い"] = true, },
		["u"] = { ["う"] = true, },
	}
	
	if vowel and isLengthenerFor[vowel][kana2] then
		return true
	else
		return false
	end
end

local function tag(hira)
	if type(hira) == "string" then
		return '<span class="Hira">' .. hira .. '</span>'
	end
end

local function makeRow(list, length)
	local row = {}
	
	if type(list) ~= "table" then
		mw.log("first argument to makeRow isn't table")
		return nil
	end
	
	if type(length) == "number" then
		for i = #list, length - 1 do
			table.insert(list, "")
		end
	end
	
	for i, item in pairs(list) do
		table.insert(row, "| " .. tag(item))
	end
	
	return table.concat(row, "\n")
end

function export.getUnits(kana)
	local wordChars
	
	if not isKana(kana) then
		logOrThrowError("Argument 1 to getUnits, " .. kana .. ", contains the non-kana word characters " .. getNonKana(kana) .. ".")
		return nil
	end
	
	if find(kana, "[ァ-ヶ]") then
		kana = require("Module:ja").kata_to_hira(kana)
	end
	
	local isSyllabic = {
		["っ"] = true,
		["ん"] = true,
	}
	
	local isMoraic = {
		["ゃ"] = true,
		["ゅ"] = true,
		["ょ"] = true,
	}
	
	local units = {}
	local index = 0
	for char in gmatch(kana, ".") do
		local addToLast = isMoraic[char] or isSyllabic[char]
		if addToLast and index == 0 then
			require("Module:debug").track("ruby/kana beginning with syllabic or moraic char")
			logOrThrowError("The symbol " .. char .. " cannot occur at the beginning of a string of kanji: " .. kana .. ".")
			return nil
		end
		
		if not addToLast then
			index = index + 1
		end
		
		addAtIndex(units, index, char)
	end
	
	return units
end

function export.divideKana(kanji, kana)
	if not (type(kanji) == "string" and type(kana) == "string") then
		mw.log("divideKana received improper arguments.")
		return nil
	end
	
	if not isKana(kana) then
		logOrThrowError("Argument 1 to getUnits, " .. kana .. ", contains the non-kana word characters " .. getNonKana(kana) .. ".")
		return nil
	end
	
	kana = replace(kana, "[ %.-]", "")
	
	local kanjiTable = require("Module:string").matchToArray(kanji, ".")
	local kanjiCount = #kanjiTable
	local kanaUnits = export.getUnits(kana)
	
	if not kanaUnits then
		return nil
	end
	
	if #kanaUnits > kanjiCount then
		local i = 1
		while i <= #kanaUnits do
			if formsLongVowel(kanaUnits[i], kanaUnits[i + 1]) then
				addAtIndex(kanaUnits, i, kanaUnits[i + 1])
				table.remove(kanaUnits, i + 1)
			end
			
			i = i + 1
			
			if #kanaUnits == kanjiCount then
				break
			end
		end
	end
	
	return kanjiTable, kanaUnits
end

function export.printUnits(frame)
	local params = {
		[1] = { list = true },
	}
	
	local args = require("Module:parameters").process(frame.args, params)
	
	local listOfUnits = {}
	for i, kana in pairs(args[1]) do
		local units = export.getUnits(kana)
		table.insert(listOfUnits, units)
	end
	
	local output = {}
	for i, units in pairs(listOfUnits) do
		local printout = table.concat(units, "、")
		table.insert(output, "* " .. tag(printout) .. "<br>" .. require("Module:ja").kana_to_romaji(args[1][i]))
	end
	
	return table.concat(output, "\n")
end

function export.showKanaDivision(frame)
	local params = {
		[1] = { list = true },
	}
	
	local args = require("Module:parameters").process(frame.args, params)
	
	local tableWidth = 0
	local kanjiWords, kanaWords = {}, {}
	for i, arg in pairs(args[1]) do
		local kanjiWord, kanaWord = arg:match("^([^:]+):(.+)$")
		kanjiWord, kanaWord = export.divideKana(kanjiWord, kanaWord)
		if tableWidth < #kanjiWord then
			tableWidth = #kanjiWord
		end
		if tableWidth < #kanaWord then
			tableWidth = #kanaWord
		end
		table.insert(kanjiWords, kanjiWord)
		table.insert(kanaWords, kanaWord)
	end
	
	local output = { '{| class="wikitable"\n' }
	local rows = {}
	for i, kanaWord in ipairs(kanaWords) do
		table.insert(rows, makeRow(kanjiWords[i], tableWidth))
		table.insert(rows, makeRow(kanaWord, tableWidth))
	end
	
	rows = table.concat(rows, "\n|-\n")
	
	table.insert(output, rows .. "\n|}")
	
	output = table.concat(output)
	
	return output
end

-- Originally from [[Module:User:Suzukaze-c/02]]
function export.r(frame)
	local one = frame.args[1] or ''
	local two = frame.args[2] or ''
	local three = frame.args[3] or ''
	local four = frame.args[4] or ''

	local jp = ''
	local tr = ''
	local gloss = frame.args['gloss'] or ''
	local choice = ''

	if find(one, '[0-9]') then
		choice = one
		jp = two
		linktitle = three
		gloss = (gloss ~= '' and gloss or four)
	elseif one == 'ja' then
		choice = ''
		jp = two
		linktitle = three
		gloss = (gloss ~= '' and gloss or four)
	else
		choice = ''
		jp = one
		linktitle = two
		gloss = (gloss ~= '' and gloss or three)
	end
	
	if mw.ustring.match(jp, ".%[%[[^%]]+%]%]") then
		error("Cannot process Japanese text with embedded wikilinks.")
	end

	local content = mw.title.new(jp):getContent()
	if not content then
		return "{{ja-l|" .. jp .. "}}"
	end
	local readings = {}

	local function process(text)
		text = replace(text, 'hhira=[^|}]+', '')
		text = replace(text, 'decl=[^|}]+', '')
		text = replace(text, 'infl=[^|}]+', '')
		text = replace(text, 'kyu=[^|}]+', '')
		text = replace(text, 'head=[^|}]+', '')
		text = replace(text, 'hira=', '')
		if find(text, 'proper') then
			text = '^' .. replace(text, '([ |])', '%1^')
		end
		if find(content, 'infl=い') then
			text = replace(text, 'しい', 'し.い')
		end
		if find(content, 'ja%-verb') then
			text = replace(text, 'おう', 'お.う')
		end
		for parameter in text:gmatch('[^|]+') do
			if find(parameter, '[あ-ー]') then
				table.insert(readings, parameter)
			end
		end
	end

	for parameters in gmatch(content, '{{ja%-adj|([^}]+)}}') do
		process(parameters)
	end
	for parameters in gmatch(content, '{{ja%-noun|([^}]+)}}') do
		process(parameters)
	end
	for parameters in gmatch(content, '{{ja%-verb|([^}]+)}}') do
		process(parameters)
	end
	for parameters in gmatch(content, '{{ja%-verb%-suru|([^}]+)}}') do
		process(parameters)
	end
	for parameters in gmatch(content, '{{ja%-phrase|([^}]+)}}') do
		process(parameters)
	end
	for parameters in gmatch(content, '{{ja%-pos|([^}]+)}}') do
		process(parameters)
	end
	for parameters in gmatch(content, '{{ja%-altread|([^}]+)}}') do
		process(parameters)
	end

	readings = require("Module:table").removeDuplicates(readings)

	if #readings > 1 then
		if choice ~= '' then
			tr = readings[tonumber(choice)]
		else
			return '{{ja-r|' .. jp .. '|ーーーーー}}\n' .. require("Module:debug").highlight_dump(readings)
		end
	else
		tr = readings[1] or ''
	end

	-- if term is pure kana and kana is identical
	if replace(jp, '[あ-ー]', '') == '' and tr == jp then
		tr = ''
	end

	if gloss ~= '' then
		gloss = ': ' .. gloss
	end

	if tr ~= '' then
		tr = '|' .. tr
	end

	if linktitle ~= '' then
		jp = 'linkto=' .. jp .. '|' .. linktitle
	end
	
	if tr ~= '' then
		return '{{ja-r|' .. jp .. tr .. '}}' .. gloss
	else
		return '{{ja-l|' .. jp .. '}}' .. gloss
	end

	--[[

	変換済みの言葉を再変換
	・選択してスペースキーを押す
	・選択してWin+Cを押す

	]]
end


return export