Module:Redirect

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search
Lua
CodeDiscussionEditHistoryLinksLink count Subpages:DocumentationTestsResultsSandboxLive code All modules

This module implements templates that detect and resolve redirects and soft category redirects.

Soft redirects are hard to detect and costly as they require loading and parsing the wiki page content of 'Category:' pages.

Soft redirects are category pages containing the template

or one of its known aliases on Commons (there are too many!!!):

For the full list of aliases of {{Category redirect}}, see

https://commons.wikimedia.org/w/index.php?title=Special:WhatLinksHere/Template:Category_redirect&hidetrans=1&hidelinks=1

For the full list of derived templates (specifying an embedded reason parameter passed to the main template), see

https://commons.wikimedia.org/w/index.php?title=Special:WhatLinksHere/Template:Category_redirect&namespace=10&hidelinks=1&hideredirs=1

The 'target' parameter value for these templates may be prefixed by 'Category:' or ':Category:' or not (it is implied).

Code

-- This module provides functions for getting the target of a redirect page.

local p = {}

local char = mw.ustring.char

-- proxy to load the module lazily, replaced by the actual loaded function
local getArgs = function(...)
	getArgs = require('Module:Arguments').getArgs
	return getArgs(...)
end

-- Gets a mw.title object, using pcall to avoid generating script errors if we
-- are over the expensive function count limit (among other possible causes).
local function getTitle(...)
	local success, titleObj = pcall(mw.title.new, ...)
	if success then
		return titleObj
	else
		return nil
	end
end

--[[
Detect hard redirect in wiki page content (not used inside this module)
]]
function p.getTargetFromText(text)
	text = text or ''
	return text:match('#[Rr][Ee][Dd]...[Cc][Tt][%s:]*%[%[([^]|]-)%s*%]%]')
end

--[[
Detect soft redirect in wiki page content of 'Category:' pages, using template
	{{Category redirect|target}}
or one of its known aliases on Commons (there are too many!!!):
	{{Category Redirect|target}}
	{{Categoryredirect|target}}
	{{Cat redirect|target}}
	{{Catredirect|target}}
	{{Catredir|target}}
	{{Cat-redirect|target}}
	{{Cat-red|target}}
	{{Redirect category|target}}
	{{Redirect cat|target}}
	{{Seecat|target}}
	{{See cat|target}}
	{{Endashcatredirect|target}}
	{{Synonym taxon category redirect|target}}
	{{Invalid taxon category redirect|target}}
	{{Monotypic taxon category redirect|target}}
The `target` value may be prefixed by 'Category:' or not (implied). For the full list of aliases, see
	https://commons.wikimedia.org/w/index.php?title=Special:WhatLinksHere/Template:Category_redirect&hidetrans=1&hidelinks=1
]]
function p.getTargetFromCatRedirect(content)
	-- Basic filtering: only in the relevant content shown on target page itself
	content = (content or '')
		:gsub('<!%-%-(.-)%-%->', '') -- discard HTML comments
		:gsub('<includeonly%s*>(.-)</includeonly>', '') -- discard 'includeonly' sections and their content
		:gsub('<includeonly%s*>(.-)$', '') -- discard unclosed 'includeonly' sections
		:gsub('<[/]?onlyinclude>', '') -- discard 'onlyinclude' opening/closing tags
		:gsub('<[/]?noinclude>', '') -- discard 'noinclude' opening/closing tags
		:gsub('</?nowiki%s*/?>', '') -- discard nowiki opening/closing/selfclosed tags
	-- Locate the template transclusion, keep their parameters only
	-- Note: there may potentially be several instances, this should not occur
	content =
		content:match('{{[%s_]*[Cc]ategory*[%s_]*[Rr]edirect[%s_]*|(.-)}}') or
		content:match('{{[%s_]*[Cc]at[%s_]*[Rr]edirect[%s_]*|(.-)}}') or
		content:match('{{[%s_]*[Cc]at-[Rr]edirect[%s_]*|(.-)}}') or
		content:match('{{[%s_]*[Cc]at[%s_]*[Rr]edir[%s_]*|(.-)}}') or
		content:match('{{[%s_]*[Cc]at-red[%s_]*|(.-)}}') or
		content:match('{{[%s_]*[Rr]edirect[%s_]*[Cc]ategory[%s_]*|(.-)}}') or
		content:match('{{[%s_]*[Rr]edirect[%s_]*[Cc]at[%s_]*|(.-)}}') or
		content:match('{{[%s_]*[Ss]ee[%s_]*[Cc]at[%s_]*|(.-)}}') or
		content:match('{{[%s_]*[Ee]ndashcatredirect[%s_]*|(.-)}}') or
		content:match('{{[%s_]*[Ss]ynonym[%s_]*taxon[%s_]*category[%s_]*redirect[%s_]*|(.-)}}') or
		content:match('{{[%s_]*[Ii]nvalid[%s_]*taxon[%s_]*category[%s_]*redirect[%s_]*|(.-)}}') or
		content:match('{{[%s_]*[Mm]onotypic[%s_]*taxon[%s_]*category[%s_]*redirect[%s_]*|(.-)}}')
	if not content then
		return nil
	end
	do
		-- Parse template parameters (they may be in arbitrary order)
		local params, n = {}, 0
		for param in content:gmatch("([^|]+)") do
			local key
			local pos = param:find('=')
			if pos then
				key = param:sub(1, pos - 1):match('^%s*(.-)%s*$') -- split then trim
				param = param:sub(pos + 1):match('^%s*(.-)%s*$') -- split then trim
			else
				n = n + 1
				key = tostring(n)
			end
			params[key] = param
		end
		-- The target is in parameter ['1'] of the template transclusion
		content = params['1']
	end
	-- Check there's an effective target parameter to the template
	if not content then
		return nil
	end
	content = content
		-- The target parameter may contain some known character entities
		-- (their validity in page names is not checked here, just parsed)
		:gsub('&([#%d%a]+);', function(entity)
			if entity:sub(1,1) == '#' then
				local codepoint
				if entity:sub(2,2) == 'x' then
					codepoint = tonumber(entity:sub(3), 16)
				else
					codepoint = tonumber(entity:sub(2))
				end
				if codepoint and (
					codepoint >= 0x0009 and codepoint <= 0x000D or -- accept some whitespace C0 controls
					codepoint >= 0x0020 and codepoint <= 0x10FFFD -- normal range
						and (codepoint < 0x0080 or codepoint > 0x009F) -- exclude C1 controls
						and (codepoint < 0xDC00 or codepoint > 0xDFFF) -- exclude surrogates
						and (codepoint < 0xFDD0 or codepoint > 0xFDEF) -- exclude non-characters in BMP
						and codepoint % 0x10000 <= 0xFFFD -- exclude non-characters at end of planes
				) then
					return char(codepoint)
				end
			else
				if entity == 'amp' then return '&' end
				if entity == 'gt' then return '>' end
				if entity == 'lt' then return '<' end
				if entity == 'quot' then return '"' end
			end
			return entity
		end)
		-- Normalize spaces in the target according to Mediawiki pagename rules
		:gsub('[%s_]+', ' ')
		-- The target parameter should be trimmed by the template
		:match('^ ?(.-) ?$')
	-- The target should not be empty and not contain tag delimiters or other character forbidden in full page names
	-- Note: templates used inside the target parameter of the soft redirect are not expanded (there should be none)
	-- as this is costly or memory intensive and slow (would require invoking a full mediawiki parser).
	if content == '' or content:find('[<>%[%]{|}]') then
		return nil
	end
	-- The 'Category:' namespace is implied (it will be prefixed by a ':' to create a link)
	return 'Category:' .. (
		content:match('^:*[Cc][Aa][Tt][Ee][Gg][Oo][Rr][Yy] *: *(.-)$') or
		content
	)
end

-- Get page name that a redirect leads to, or nil if it isn't a redirect.
function p.getTargetFrom(obj)
	--[[
	obj:getContent() is intensive for large pages, but obj.redirectTarget returns directly
	the target without needing to load and parse the text content of the wiki page.
	This works for wiki pages that are hard redirects (content starting by '#REDIRECT').
	In this case, using p.getTargetFromText(obj:getContent()) is not needed.
	]]
	local r = obj.isRedirect and obj.redirectTarget
	if not r then
		--[[
		For pages using soft redirects (like categories), we must detect the template
		used in the page content itself (this may be costly in memory and slow if not
		loaded in the cache of page contents).
		]]
		if obj:inNamespace('Category') then
			r = p.getTargetFromCatRedirect(obj:getContent())
			if r then
				local obj2 = getTitle(r)
				if obj2 then -- getTitle may fail
					obj = obj2 -- true replacement withe the target
					-- special: the replacement may also be a hard redirect
					r = obj.isRedirect and obj.redirectTarget
					-- not done here: second soft redirect detection
				end
			end
		end
		if not r and obj.isRedirect then
			-- The page is a redirect, but matching failed, which may be a bug
			-- in redirect matching pattern, so throw an error.
			error(string.format(
				'could not parse redirect on page "%s"',
				fulltext and obj.fullText or obj.prefixedText
			))
		end
		r = obj.fulltext and obj.fullText or obj.prefixedText
	end
	return r
end

local function fmtTitle(target, fulltext, ensureTitleExists)
	local titleObj
	if type(target) == 'string' or type(target) == 'number' then
		titleObj = getTitle(target)
	elseif type(target) == 'table' and type(target.getContent) == 'function' then
		titleObj = target
	else
		error(string.format("bad argument #1 to 'fmtTitle' " ..
			"(string, number, or title object expected, got %s)",
			type(target)
		), 2)
	end
	if titleObj then
		if not ensureTitleExists or ensureTitleExists and titleObj.exists then
			return fulltext and titleObj.fullText or titleObj.prefixedText
		end
	end
end

-- Gets the target of a redirect. If the page specified is not a redirect,
-- returns nil.
function p.getTarget(page, rname, fulltext, ensureTitleExists)
	-- Get the title object. Both page names and title objects are allowed
	-- as input.
	local titleObj
	if type(page) == 'string' or type(page) == 'number' then
		titleObj = getTitle(page)
	elseif type(page) == 'table' and type(page.getContent) == 'function' then
		titleObj = page
	else
		error(string.format("bad argument #1 to 'getTarget' " ..
			"(string, number, or title object expected, got %s)",
			type(page)
		), 2)
	end
	if titleObj and titleObj.exists and
		(titleObj.isRedirect or titleObj:inNamespace("Category"))
	then
		-- Find the target by using string matching on the page content.
		return fmtTitle(p.getTargetFrom(titleObj), fulltext, ensureTitleExists)
	end
end

--[[
-- Given a single page name determines what page it redirects to and returns the
-- target page name, or the passed page name when not a redirect. The passed
-- page name can be given as plain text or as a page link.
-- 
-- Returns page name as plain text, or when the bracket parameter is given, as a
-- page link. Returns an error message when page does not exist or the redirect
-- target cannot be determined for some reason.
--]]
function p.luaMain(rname, bracket, fulltext, ensureTitleExists)
	if type(rname) ~= "string" or not rname:find("%S") then
		return nil
	end
	rname = rname:match("%[%[%s*([%]|]-)%s*%]%]") or rname
	local ret = p.getTarget(rname, fulltext, ensureTitleExists) or
		fmtTitle(rname, fulltext, ensureTitleExists)
	link = bracket and (ret == rname and "[[:%s]]" or "[[:%s|%s]]") or "%s"
	return ret and link:format(ret, rname)
end

local function use(x)
	return x and (x == '1' or x == 't' or x == 'true' or x == 'y' or x == 'yes') or nil
end

-- Provides access to the luaMain function from wikitext, may return redlinks,
-- albeit page titles that do not exist in the wiki.
-- Parameters alt, bracket, fulltext, ensureTitleExists are optional
function p.main(frame)
	local args = getArgs(frame, {frameOnly = true})
	local rname = args[1]
	local alt = args[2] or ''
	local bracket = use(args.bracket)
	local fulltext = use(args.fulltext)
	local ensureTitleExists = use(args.noredlinks)

	return p.luaMain(rname, bracket, fulltext, ensureTitleExists) or alt
end

-- main variant, returns empty string if a redlink would be returned otherwise.
-- '|noredlinks=true' by default.
function p.mainE(frame)
	local args = getArgs(frame, {frameOnly = true})
	local rname = args[1]
	local alt = args[2] or ''
	local bracket = use(args.bracket)
	local fulltext = use(args.fulltext)

	return p.luaMain(rname, bracket, fulltext, true) or alt
end

-- main variant, returns args[2] if a redlink would be returned otherwise.
-- '|bracket=true|noredlinks=true' by default
function p.mainA(frame)
	local args = getArgs(frame, {frameOnly = true})
	local rname = args[1]
	local alt = args[2] or ''
	local bracket = use(args.bracket)

	return p.luaMain(rname, bracket, true, true) or alt
end

-- Returns true if the specified page is a redirect, and false otherwise.
function p.luaIsRedirect(page)
	local titleObj = getTitle(page)
	if not titleObj then
		return false
	end
	if titleObj.isRedirect then
		return true
	else
		return false
	end
end

-- Provides access to the luaIsRedirect function from wikitext, returning 'yes'
-- if the specified page is a redirect, and the blank string otherwise.
function p.isRedirect(frame)
	local args = getArgs(frame, {frameOnly = true})
	if p.luaIsRedirect(args[1]) then
		return 'yes'
	else
		return ''
	end
end

return p