Módulo:Text

Fonte: Enciclopédia de conhecimento da Igreja de Deus
Saltar para a navegação Saltar para a pesquisa

A documentação para este módulo pode ser criada na página Módulo:Text/doc

local Text = { serial = "2019-11-12",
															suite  = "Text",
															item   = 29387871 }
--[=[
Text utilities
]=]
local Failsafe  = Text
local GlobalMod = Text
-- local globals
local PatternCJK        = false
local PatternCombined   = false
local PatternLatin      = false
local PatternTerminated = false
local RangesLatin       = false
local SeekQuote         = false
local foreignModule = function ( access, advanced, append, alt, alert )
				-- Fetch global module
				-- Precondition:
				--     access    -- string, with name of base module
				--     advanced  -- true, for require(); else mw.loadData()
				--     append    -- string, with subpage part, if any; or false
				--     alt       -- number, of wikidata item of root; or false
				--     alert     -- true, for throwing error on data problem
				-- Postcondition:
				--     Returns whatever, probably table
				-- 2019-10-29
				local storage = access
				local finer = function ()
																						if append then
																										storage = string.format( "%s/%s",
																																																			storage,
																																																			append )
																						end
																		end
				local fun, lucky, r, suited
				if advanced then
								fun = require
				else
								fun = mw.loadData
				end
				GlobalMod.globalModules = GlobalMod.globalModules or { }
				suited = GlobalMod.globalModules[ access ]
				if not suited then
								finer()
								lucky, r = pcall( fun,  "Module:" .. storage )
				end
				if not lucky then
								if not suited  and
											type( alt ) == "number"  and
											alt > 0 then
												suited = string.format( "Q%d", alt )
												suited = mw.wikibase.getSitelink( suited )
												GlobalMod.globalModules[ access ] = suited or true
								end
								if type( suited ) == "string" then
												storage = suited
												finer()
												lucky, r = pcall( fun, storage )
								end
								if not lucky and alert then
												error( "Missing or invalid page: " .. storage, 0 )
								end
				end
				return r
end -- foreignModule()
local function factoryQuote()
				-- Create quote definitions
				if not Text.quoteLang then
								local quoting = foreignModule( "Text",
																																							false,
																																							"quoting",
																																							Text.item )
								if type( quoting ) == "table" then
												Text.quoteLang = quoting.langs
												Text.quoteType = quoting.types
								end
								if type( Text.quoteLang ) ~= "table" then
												Text.quoteLang = { }
								end
								if type( Text.quoteType ) ~= "table" then
												Text.quoteType = { }
								end
								if type( Text.quoteLang.en ) ~= "string" then
												Text.quoteLang.en = "ld"
								end
								if type( Text.quoteType[ Text.quoteLang.en ] ) ~= "table" then
												Text.quoteType[ Text.quoteLang.en ] = { { 8220, 8221 },
																																																				{ 8216, 8217 } }
								end
				end
end -- factoryQuote()
local function fiatQuote( apply, alien, advance )
				-- Quote text
				-- Parameter:
				--     apply    -- string, with text
				--     alien    -- string, with language code
				--     advance  -- number, with level 1 or 2
				local r = apply
				local suite
				factoryQuote()
				suite = Text.quoteLang[ alien ]
				if not suite then
								local slang = alien:match( "^(%l+)-" )
								if slang then
												suite = Text.quoteLang[ slang ]
								end
								if not suite then
												suite = Text.quoteLang.en
								end
				end
				if suite then
								local quotes = Text.quoteType[ suite ]
								if quotes then
												local space
												if quotes[ 3 ] then
																space = " "
												else
																space = ""
												end
												quotes = quotes[ advance ]
												if quotes then
																r = mw.ustring.format( "%s%s%s%s%s",
																																							mw.ustring.char( quotes[ 1 ] ),
																																							space,
																																							apply,
																																							space,
																																							mw.ustring.char( quotes[ 2 ] ) )
												end
								else
												mw.log( "fiatQuote() " .. suite )
								end
				end
				return r
end -- fiatQuote()
Text.char = function ( apply, again, accept )
				-- Create string from codepoints
				-- Parameter:
				--     apply   -- table (sequence) with numerical codepoints, or nil
				--     again   -- number of repetitions, or nil
				--     accept  -- true, if no error messages to be appended
				-- Returns: string
				local r
				if type( apply ) == "table" then
								local bad   = { }
								local codes = { }
								local s
								for k, v in pairs( apply ) do
												s = type( v )
												if s == "number" then
																if v < 32  and  v ~= 9  and  v ~= 10 then
																				v = tostring( v )
																else
																				v = math.floor( v )
																				s = false
																end
												elseif s ~= "string" then
																v = tostring( v )
												end
												if s then
																table.insert( bad, v )
												else
																table.insert( codes, v )
												end
								end -- for k, v
								if #bad == 0 then
												if #codes > 0 then
																r = mw.ustring.char( unpack( codes ) )
																if again then
																				if type( again ) == "number" then
																								local n = math.floor( again )
																								if n > 1 then
																												r = r:rep( n )
																								elseif n < 1 then
																												r = ""
																								end
																				else
																								s = "bad repetitions: " .. tostring( again )
																				end
																end
												end
								else
												s = "bad codepoints: " .. table.concat( bad, " " )
								end
								if s  and  not accept then
												r = tostring(  mw.html.create( "span" )
																																		:addClass( "error" )
																																		:wikitext( s ) )
								end
				end
				return r or ""
end -- Text.char()
Text.concatParams = function ( args, apply, adapt )
				-- Concat list items into one string
				-- Parameter:
				--     args   -- table (sequence) with numKey=string
				--     apply  -- string (optional); separator (default: "|")
				--     adapt  -- string (optional); format including "%s"
				-- Returns: string
				local collect = { }
				for k, v in pairs( args ) do
								if type( k ) == "number" then
												v = mw.text.trim( v )
												if v ~= "" then
																if adapt then
																				v = mw.ustring.format( adapt, v )
																end
																table.insert( collect, v )
												end
								end
				end -- for k, v
				return table.concat( collect,  apply or "|" )
end -- Text.concatParams()
Text.containsCJK = function ( analyse )
				-- Is any CJK code within?
				-- Parameter:
				--     analyse  -- string
				-- Returns: true, if CJK detected
				local r
				if not PatternCJK then
								PatternCJK = mw.ustring.char( 91,
																																							13312, 45,  40959,
																																						131072, 45, 178207,
																																						93 )
				end
				if mw.ustring.find( analyse, PatternCJK ) then
								r = true
				else
								r = false
				end
				return r
end -- Text.containsCJK()
Text.getPlain = function ( adjust )
				-- Remove wikisyntax from string, except templates
				-- Parameter:
				--     adjust  -- string
				-- Returns: string
				local i = adjust:find( "<!--", 1, true )
				local r = adjust
				local j
				while i do
								j = r:find( "-->",  i + 3,  true )
								if j then
												r = r:sub( 1, i ) .. r:sub( j + 3 )
								else
												r = r:sub( 1, i )
								end
								i = r:find( "<!--", i, true )
				end    -- "<!--"
				r = r:gsub( "(</?%l[^>]*>)", "" )
									:gsub( "'''(.+)'''", "%1" )
									:gsub( "''(.+)''", "%1" )
									:gsub( "&nbsp;", " " )
				return mw.text.unstrip( r )
end -- Text.getPlain()
Text.isLatinRange = function ( adjust )
				-- Are characters expected to be latin or symbols within latin texts?
				-- Precondition:
				--     adjust  -- string, or nil for initialization
				-- Returns: true, if valid for latin only
				local r
				if not RangesLatin then
								RangesLatin = { {    7,  687 },
																								{ 7531, 7578 },
																								{ 7680, 7935 },
																								{ 8194, 8250 } }
				end
				if not PatternLatin then
								local range
								PatternLatin = "^["
								for i = 1, #RangesLatin do
												range = RangesLatin[ i ]
												PatternLatin = PatternLatin ..
																											mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
								end    -- for i
								PatternLatin = PatternLatin .. "]*$"
				end
				if adjust then
								if mw.ustring.match( adjust, PatternLatin ) then
												r = true
								else
												r = false
								end
				end
				return r
end -- Text.isLatinRange()
Text.isQuote = function ( ask )
				-- Is this character any quotation mark?
				-- Parameter:
				--     ask  -- string, with single character
				-- Returns: true, if ask is quotation mark
				local r
				if not SeekQuote then
								SeekQuote = mw.ustring.char(   34,       -- "
																																							39,       -- '
																																						171,       -- laquo
																																						187,       -- raquo
																																					8216,       -- lsquo
																																					8217,       -- rsquo
																																					8218,       -- sbquo
																																					8220,       -- ldquo
																																					8221,       -- rdquo
																																					8222,       -- bdquo
																																					8249,       -- lsaquo
																																					8250,       -- rsaquo
																																					0x300C,     -- CJK
																																					0x300D,     -- CJK
																																					0x300E,     -- CJK
																																					0x300F )    -- CJK
				end
				if ask == "" then
								r = false
				elseif mw.ustring.find( SeekQuote, ask, 1, true ) then
								r = true
				else
								r = false
				end
				return r
end -- Text.isQuote()
Text.listToText = function ( args, adapt )
				-- Format list items similar to mw.text.listToText()
				-- Parameter:
				--     args   -- table (sequence) with numKey=string
				--     adapt  -- string (optional); format including "%s"
				-- Returns: string
				local collect = { }
				for k, v in pairs( args ) do
								if type( k ) == "number" then
												v = mw.text.trim( v )
												if v ~= "" then
																if adapt then
																				v = mw.ustring.format( adapt, v )
																end
																table.insert( collect, v )
												end
								end
				end -- for k, v
				return mw.text.listToText( collect )
end -- Text.listToText()
Text.quote = function ( apply, alien, advance )
				-- Quote text
				-- Parameter:
				--     apply    -- string, with text
				--     alien    -- string, with language code, or nil
				--     advance  -- number, with level 1 or 2, or nil
				-- Returns: quoted string
				local mode, slang
				if type( alien ) == "string" then
								slang = mw.text.trim( alien ):lower()
				else
								slang = mw.title.getCurrentTitle().pageLanguage
								if not slang then
												-- TODO FIXME: Introduction expected 2017-04
												slang = mw.language.getContentLanguage():getCode()
								end
				end
				if advance == 2 then
								mode = 2
				else
								mode = 1
				end
				return fiatQuote( mw.text.trim( apply ), slang, mode )
end -- Text.quote()
Text.quoteUnquoted = function ( apply, alien, advance )
				-- Quote text, if not yet quoted and not empty
				-- Parameter:
				--     apply    -- string, with text
				--     alien    -- string, with language code, or nil
				--     advance  -- number, with level 1 or 2, or nil
				-- Returns: string; possibly quoted
				local r = mw.text.trim( apply )
				local s = mw.ustring.sub( r, 1, 1 )
				if s ~= ""  and  not Text.isQuote( s, advance ) then
								s = mw.ustring.sub( r, -1, 1 )
								if not Text.isQuote( s ) then
												r = Text.quote( r, alien, advance )
								end
				end
				return r
end -- Text.quoteUnquoted()
Text.removeDiacritics = function ( adjust )
				-- Remove all diacritics
				-- Parameter:
				--     adjust  -- string
				-- Returns: string; all latin letters should be ASCII
				--                  or basic greek or cyrillic or symbols etc.
				local cleanup, decomposed
				if not PatternCombined then
								PatternCombined = mw.ustring.char( 91,
																																												0x0300, 45, 0x036F,
																																												0x1AB0, 45, 0x1AFF,
																																												0x1DC0, 45, 0x1DFF,
																																												0xFE20, 45, 0xFE2F,
																																											93 )
				end
				decomposed = mw.ustring.toNFD( adjust )
				cleanup    = mw.ustring.gsub( decomposed, PatternCombined, "" )
				return mw.ustring.toNFC( cleanup )
end -- Text.removeDiacritics()
Text.sentenceTerminated = function ( analyse )
				-- Is string terminated by dot, question or exclamation mark?
				--     Quotation, link termination and so on granted
				-- Parameter:
				--     analyse  -- string
				-- Returns: true, if sentence terminated
				local r = mw.text.trim( analyse )
				if not PatternTerminated then
								PatternTerminated = mw.ustring.char( 91,
																																													12290,
																																													65281,
																																													65294,
																																													65311 )
																												.. "!%.%?…][\"'%]‹›«»‘’“”]*$"
				end
				if mw.ustring.find( r, PatternTerminated ) then
								r = true
				else
								r = false
				end
				return r
end -- Text.sentenceTerminated()
Text.ucfirstAll = function ( adjust )
				-- Capitalize all words
				-- Precondition:
				--     adjust  -- string
				-- Returns: string with all first letters in upper case
				local r = " " .. adjust
				local i = 1
				local c, j, m
				if adjust:find( "&" ) then
								r = r:gsub( "&amp;",      "&#38;" )
													:gsub( "&lt;",       "&#60;" )
													:gsub( "&gt;",       "&#62;" )
													:gsub( "&nbsp;",    "&#160;" )
													:gsub( "&thinsp;", "&#8201;" )
													:gsub( "&zwnj;",   "&#8204;" )
													:gsub( "&zwj;",    "&#8205;" )
													:gsub( "&lrm;",    "&#8206;" )
													:gsub( "&rlm;",    "&#8207;" )
								m = true
				end
				while i do
								i = mw.ustring.find( r, "%W%l", i )
								if i then
												j = i + 1
												c = mw.ustring.upper( mw.ustring.sub( r, j, j ) )
												r = string.format( "%s%s%s",
																															mw.ustring.sub( r, 1, i ),
																															c,
																															mw.ustring.sub( r, i + 2 ) )
												i = j
								end
				end -- while i
				r = r:sub( 2 )
				if m then
								r = r:gsub(     "&#38;", "&amp;" )
													:gsub(     "&#60;", "&lt;" )
													:gsub(     "&#62;", "&gt;" )
													:gsub(    "&#160;", "&nbsp;" )
													:gsub(   "&#8201;", "&thinsp;" )
													:gsub(   "&#8204;", "&zwnj;" )
													:gsub(   "&#8205;", "&zwj;" )
													:gsub(   "&#8206;", "&lrm;" )
													:gsub(   "&#8207;", "&rlm;" )
													:gsub( "&#X(%x+);", "&#x%1;" )
				end
				return r
end -- Text.ucfirstAll()
Text.uprightNonlatin = function ( adjust )
				-- Ensure non-italics for non-latin text parts
				--     One single greek letter might be granted
				-- Precondition:
				--     adjust  -- string
				-- Returns: string with non-latin parts enclosed in <span>
				local r
				Text.isLatinRange()
				if mw.ustring.match( adjust, PatternLatin ) then
								-- latin only, horizontal dashes, quotes
								r = adjust
				else
								local c
								local j    = false
								local k    = 1
								local m    = false
								local n    = mw.ustring.len( adjust )
								local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>"
								local flat = function ( a )
																		-- isLatin
																		local range
																		for i = 1, #RangesLatin do
																						range = RangesLatin[ i ]
																						if a >= range[ 1 ]  and  a <= range[ 2 ] then
																										return true
																						end
																		end    -- for i
														end -- flat()
								local focus = function ( a )
																		-- char is not ambivalent
																		local r = ( a > 64 )
																		if r then
																						r = ( a < 8192  or  a > 8212 )
																		else
																						r = ( a == 38  or  a == 60 )    -- '&' '<'
																		end
																		return r
														end -- focus()
								local form = function ( a )
																return string.format( span,
																																						r,
																																						mw.ustring.sub( adjust, k, j - 1 ),
																																						mw.ustring.sub( adjust, j, a ) )
														end -- form()
								r = ""
								for i = 1, n do
												c = mw.ustring.codepoint( adjust, i, i )
												if focus( c ) then
																if flat( c ) then
																				if j then
																								if m then
																												if i == m then
																																-- single greek letter.
																																j = false
																												end
																												m = false
																								end
																								if j then
																												local nx = i - 1
																												local s  = ""
																												for ix = nx, 1, -1 do
																																c = mw.ustring.sub( adjust, ix, ix )
																																if c == " "  or  c == "(" then
																																				nx = nx - 1
																																				s  = c .. s
																																else
																																				break -- for ix
																																end
																												end -- for ix
																												r = form( nx ) .. s
																												j = false
																												k = i
																								end
																				end
																elseif not j then
																				j = i
																				if c >= 880  and  c <= 1023 then
																								-- single greek letter?
																								m = i + 1
																				else
																								m = false
																				end
																end
												elseif m then
																m = m + 1
												end
								end    -- for i
								if j  and  ( not m  or  m < n ) then
												r = form( n )
								else
												r = r .. mw.ustring.sub( adjust, k )
								end
				end
				return r
end -- Text.uprightNonlatin()
Failsafe.failsafe = function ( atleast )
				-- Retrieve versioning and check for compliance
				-- Precondition:
				--     atleast  -- string, with required version or "wikidata" or "~"
				--                 or false
				-- Postcondition:
				--     Returns  string  -- with queried version, also if problem
				--              false   -- if appropriate
				-- 2019-10-15
				local last  = ( atleast == "~" )
				local since = atleast
				local r
				if last  or  since == "wikidata" then
								local item = Failsafe.item
								since = false
								if type( item ) == "number"  and  item > 0 then
												local entity = mw.wikibase.getEntity( string.format( "Q%d",
																																																																	item ) )
												if type( entity ) == "table" then
																local seek = Failsafe.serialProperty or "P348"
																local vsn  = entity:formatPropertyValues( seek )
																if type( vsn ) == "table"  and
																			type( vsn.value ) == "string"  and
																			vsn.value ~= "" then
																				if last  and  vsn.value == Failsafe.serial then
																								r = false
																				else
																								r = vsn.value
																				end
																end
												end
								end
				end
				if type( r ) == "nil" then
								if not since  or  since <= Failsafe.serial then
												r = Failsafe.serial
								else
												r = false
								end
				end
				return r
end -- Failsafe.failsafe()
Text.test = function ( about )
				local r
				if about == "quote" then
								factoryQuote()
								r = { QuoteLang = Text.quoteLang,
														QuoteType = Text.quoteType }
				end
				return r
end -- Text.test()
-- Export
local p = { }
function p.char( frame )
				local params = frame:getParent().args
				local story = params[ 1 ]
				local codes, lenient, multiple
				if not story then
								params = frame.args
								story  = params[ 1 ]
				end
				if story then
								local items = mw.text.split( story, "%s+" )
								if #items > 0 then
												local j
												lenient  = ( params.errors == "0" )
												codes    = { }
												multiple = tonumber( params[ "*" ] )
												for k, v in pairs( items ) do
																if v:sub( 1, 1 ) == "x" then
																				j = tonumber( "0" .. v )
																elseif v == "" then
																				v = false
																else
																				j = tonumber( v )
																end
																if v then
																				table.insert( codes,  j or v )
																end
												end -- for k, v
								end
				end
				return Text.char( codes, multiple, lenient )
end
function p.concatParams( frame )
				local args
				local template = frame.args.template
				if type( template ) == "string" then
								template = mw.text.trim( template )
								template = ( template == "1" )
				end
				if template then
								args = frame:getParent().args
				else
								args = frame.args
				end
				return Text.concatParams( args,
																														frame.args.separator,
																														frame.args.format )
end
function p.containsCJK( frame )
				return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.getPlain( frame )
				return Text.getPlain( frame.args[ 1 ] or "" )
end
function p.isLatinRange( frame )
				return Text.isLatinRange( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.isQuote( frame )
				return Text.isQuote( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.listToFormat(frame)
				local lists = {}
				local pformat = frame.args["format"]
				local sep = frame.args["sep"] or ";"
				-- Parameter parsen: Listen
				for k, v in pairs(frame.args) do
								local knum = tonumber(k)
								if knum then lists[knum] = v end
				end
				-- Listen splitten
				local maxListLen = 0
				for i = 1, #lists do
								lists[i] = mw.text.split(lists[i], sep)
								if #lists[i] > maxListLen then maxListLen = #lists[i] end
				end
				-- Ergebnisstring generieren
				local result = ""
				local result_line = ""
				for i = 1, maxListLen do
								result_line = pformat
								for j = 1, #lists do
												result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1)
								end
								result = result .. result_line
				end
				return result
end
function p.listToText( frame )
				local args
				local template = frame.args.template
				if type( template ) == "string" then
								template = mw.text.trim( template )
								template = ( template == "1" )
				end
				if template then
								args = frame:getParent().args
				else
								args = frame.args
				end
				return Text.listToText( args, frame.args.format )
end
function p.quote( frame )
				local slang = frame.args[2]
				if type( slang ) == "string" then
								slang = mw.text.trim( slang )
								if slang == "" then
												slang = false
								end
				end
				return Text.quote( frame.args[ 1 ] or "",
																							slang,
																							tonumber( frame.args[3] ) )
end
function p.quoteUnquoted( frame )
				local slang = frame.args[2]
				if type( slang ) == "string" then
								slang = mw.text.trim( slang )
								if slang == "" then
												slang = false
								end
				end
				return Text.quoteUnquoted( frame.args[ 1 ] or "",
																															slang,
																															tonumber( frame.args[3] ) )
end
function p.removeDiacritics( frame )
				return Text.removeDiacritics( frame.args[ 1 ] or "" )
end
function p.sentenceTerminated( frame )
				return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""
end
function p.ucfirstAll( frame )
				return Text.ucfirstAll( frame.args[ 1 ] or "" )
end
function p.unstrip( frame )
				return mw.text.trim( mw.text.unstrip( frame.args[ 1 ] or "" ) )
end
function p.uprightNonlatin( frame )
				return Text.uprightNonlatin( frame.args[ 1 ] or "" )
end
function p.zip(frame)
				local lists = {}
				local seps = {}
				local defaultsep = frame.args["sep"] or ""
				local innersep = frame.args["isep"] or ""
				local outersep = frame.args["osep"] or ""
				-- Parameter parsen
				for k, v in pairs(frame.args) do
								local knum = tonumber(k)
								if knum then lists[knum] = v else
												if string.sub(k, 1, 3) == "sep" then
																local sepnum = tonumber(string.sub(k, 4))
																if sepnum then seps[sepnum] = v end
												end
								end
				end
				-- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden
				for i = 1, math.max(#seps, #lists) do
								if not seps[i] then seps[i] = defaultsep end
				end
				-- Listen splitten
				local maxListLen = 0
				for i = 1, #lists do
								lists[i] = mw.text.split(lists[i], seps[i])
								if #lists[i] > maxListLen then maxListLen = #lists[i] end
				end
				local result = ""
				for i = 1, maxListLen do
								if i ~= 1 then result = result .. outersep end
								for j = 1, #lists do
												if j ~= 1 then result = result .. innersep end
												result = result .. (lists[j][i] or "")
								end
				end
				return result
end
p.failsafe = function ( frame )
				-- Versioning interface
				local s = type( frame )
				local since
				if s == "table" then
								since = frame.args[ 1 ]
				elseif s == "string" then
								since = frame
				end
				if since then
								since = mw.text.trim( since )
								if since == "" then
												since = false
								end
				end
				return Failsafe.failsafe( since )  or  ""
end -- p.failsafe()
p.Text = function ()
				return Text
end -- p.Text
return p