模組 : String2

此模?已?? alpha版，可接受第三方?入，?可用于少量?面以??是否存在??，但需要受到??。?迎提供新功能或修改其?入?出机制的建?。

The module “String2” contains 4 available calls that convert strings to upper, lower, sentence or title case.

The sentence case function finds the first letter and capitalises that, so it works properly with text containing wiki-markup. Compare {{#invoke:String2|sentence|[[action game]]}} -> Action game with {{ucfirst:{{lc:[[action game]]}}}} -> action game . Piped wiki-links are handled as well: {{#invoke:String2|sentence|[[trimix (breathing gas)|trimix]]}} -> Trimix .

The title case function capitalises the first letter of each word in the text, apart from a number of short words recommended by The U.S. Government Printing Office Style Manual.

Further functions commonly used on strings would be useful additions.

用法

{#invoke:String2 | upper |…}}
{#invoke:String2 | lower |…}}
{#invoke:String2 | sentence |…}}
{#invoke:String2 | title |…}}

參數

Just one unnamed parameter is used, representing the text to be converted to the required case.

範例

Input	Output
{{#invoke:String2\| upper \| abcd }}	脚本??：函?“upper”不存在。
{{#invoke:String2\| upper \| abCD }}	脚本??：函?“upper”不存在。
{{#invoke:String2\| upper \| ABcd }}	脚本??：函?“upper”不存在。
{{#invoke:String2\| upper \| ABCD }}	脚本??：函?“upper”不存在。
{{#invoke:String2\| upper \| }}	脚本??：函?“upper”不存在。

{{#invoke:String2\| lower \| abcd }}	脚本??：函?“lower”不存在。
{{#invoke:String2\| lower \| abCD }}	脚本??：函?“lower”不存在。
{{#invoke:String2\| lower \| ABcd }}	脚本??：函?“lower”不存在。
{{#invoke:String2\| lower \| ABCD }}	脚本??：函?“lower”不存在。
{{#invoke:String2\| lower \| }}	脚本??：函?“lower”不存在。

{{#invoke:String2\| sentence \| abcd }}	Abcd
{{#invoke:String2\| sentence \| abCD }}	Abcd
{{#invoke:String2\| sentence \| ABcd }}	Abcd
{{#invoke:String2\| sentence \| ABCD }}	Abcd
{{#invoke:String2\| sentence \| [[action game]] }}	Action game
{{#invoke:String2\| sentence \| [[trimix (breathing gas)\|trimix]] }}	Trimix
{{#invoke:String2 \| sentence \| {{#invoke:WikidataIB \|getValue \|P136 \|name=genre \|fetchwikidata=ALL \|qid=Q1396889}} }}	影射小說、寓言
{{#invoke:String2\| sentence \| }}

{{#invoke:String2\| title \| abcd }}	Abcd
{{#invoke:String2\| title \| abCD }}	Abcd
{{#invoke:String2\| title \| ABcd }}	Abcd
{{#invoke:String2\| title \| ABCD }}	Abcd
{{#invoke:String2\| title \| }}
{{#invoke:String2\| title \| The Vitamins Are In My Fresh California Raisins}}	The Vitamins Are in My Fresh California Raisins

參見

Module:String for the following functions:

len
sub
sublength
match
pos
str_find
find
replace
rep

上述文? 嵌入自 Module:String2/doc 。 ( ?? | ?史 )
?者可以在本模?的沙盒 ( ?建 | ?像 ) 和???例 ( ?建 ) ?面?行??。
本模?的子?面。

local
 p
 =
 {}


p
.
trim
 =
 function
(
frame
)

	return
 mw
.
text
.
trim
(
frame
.
args
[
1
]
 or
 ""
)

end


p
.
sentence
 =
 function
 (
frame
)

	-- {{lc:}} is strip-marker safe, string.lower is not.

	frame
.
args
[
1
]
 =
 frame
:
callParserFunction
(
'lc'
,
 frame
.
args
[
1
])

	return
 p
.
ucfirst
(
frame
)

end


p
.
ucfirst
 =
 function
 (
frame
 )

	local
 s
 =
  mw
.
text
.
trim
(
 frame
.
args
[
1
]
 or
 ""
 )

	local
 s1
 =
 ""

	-- if it's a list chop off and (store as s1) everything up to the first <li>

	local
 lipos
 =
 mw
.
ustring
.
find
(
s
,
 "<li>"
 )

	if
 lipos
 then

		s1
 =
 mw
.
ustring
.
sub
(
s
,
 1
,
 lipos
 +
 3
)

		s
 =
 mw
.
ustring
.
sub
(
s
,
 lipos
 +
 4
)

	end

	-- s1 is either "" or the first part of the list markup, so we can continue

	-- and prepend s1 to the returned string

	local
 letterpos

	if
 mw
.
ustring
.
find
(
s
,
 "^%[%[[^|]+|[^%]]+%]%]"
)
 then

		-- this is a piped wikilink, so we capitalise the text, not the pipe

		local
 _

		_
,
 letterpos
 =
 mw
.
ustring
.
find
(
s
,
 "|%A*%a"
)
 -- find the first letter after the pipe

	else

		letterpos
 =
 mw
.
ustring
.
find
(
s
,
 '%a'
)

	end

	if
 letterpos
 then

		local
 first
 =
 mw
.
ustring
.
sub
(
s
,
 1
,
 letterpos
 -
 1
)

		local
 letter
 =
 mw
.
ustring
.
sub
(
s
,
 letterpos
,
 letterpos
)

		local
 rest
 =
 mw
.
ustring
.
sub
(
s
,
 letterpos
 +
 1
)

		return
 s1
 ..
 first
 ..
 mw
.
ustring
.
upper
(
letter
)
 ..
 rest

	else

		return
 s1
 ..
 s

	end

end


p
.
title
 =
 function
 (
frame
 )

	-- http://grammar.yourdictionary.com/capitalization/rules-for-capitalization-in-titles.html

	-- recommended by The U.S. Government Printing Office Style Manual:

	-- "Capitalize all words in titles of publications and documents,

	-- except a, an, the, at, by, for, in, of, on, to, up, and, as, but, or, and nor."

	local
 alwayslower
 =
 {[
'a'
]
 =
 1
,
 [
'an'
]
 =
 1
,
 [
'the'
]
 =
 1
,

		[
'and'
]
 =
 1
,
 [
'but'
]
 =
 1
,
 [
'or'
]
 =
 1
,
 [
'for'
]
 =
 1
,

		[
'nor'
]
 =
 1
,
 [
'on'
]
 =
 1
,
 [
'in'
]
 =
 1
,
 [
'at'
]
 =
 1
,
 [
'to'
]
 =
 1
,

		[
'from'
]
 =
 1
,
 [
'by'
]
 =
 1
,
 [
'of'
]
 =
 1
,
 [
'up'
]
 =
 1
 }

	local
 res
 =
 ''

	local
 s
 =
  mw
.
text
.
trim
(
 frame
.
args
[
1
]
 or
 ""
 )

	local
 words
 =
 mw
.
text
.
split
(
 s
,
 " "
)

	for
 i
,
 s
 in
 ipairs
(
words
)
 do

		-- {{lc:}} is strip-marker safe, string.lower is not.

		s
 =
 frame
:
callParserFunction
(
'lc'
,
 s
)

		if
 i
 ==
 1
 or
 alwayslower
[
s
]
 ~=
 1
 then

			s
 =
 mw
.
getContentLanguage
():
ucfirst
(
s
)

		end

		words
[
i
]
 =
 s

	end

	return
 table.concat
(
words
,
 " "
)

end


-- findlast finds the last item in a list

-- the first unnamed parameter is the list

-- the second, optional unnamed parameter is the list separator (default = comma space)

-- returns the whole list if separator not found

p
.
findlast
 =
 function
(
frame
)

	local
 s
 =
  mw
.
text
.
trim
(
 frame
.
args
[
1
]
 or
 ""
 )

	local
 sep
 =
 frame
.
args
[
2
]
 or
 ""

	if
 sep
 ==
 ""
 then
 sep
 =
 ", "
 end

	local
 pattern
 =
 ".*"
 ..
 sep
 ..
 "(.*)"

	local
 a
,
 b
,
 last
 =
 s
:
find
(
pattern
)

	if
 a
 then

		return
 last

	else

		return
 s

	end

end


-- stripZeros finds the first number and strips leading zeros (apart from units)

-- e.g "0940" -> "940"; "Year: 0023" -> "Year: 23"; "00.12" -> "0.12"

p
.
stripZeros
 =
 function
(
frame
)

	local
 s
 =
 mw
.
text
.
trim
(
frame
.
args
[
1
]
 or
 ""
)

	local
 n
 =
 tonumber
(
 string.match
(
 s
,
 "%d+"
 )
 )
 or
 ""

	s
 =
 string.gsub
(
 s
,
 "%d+"
,
 n
,
 1
 )

	return
 s

end


-- nowiki ensures that a string of text is treated by the MediaWiki software as just a string

-- it takes an unnamed parameter and trims whitespace, then removes any wikicode

p
.
nowiki
 =
 function
(
frame
)

	local
 str
 =
 mw
.
text
.
trim
(
frame
.
args
[
1
]
 or
 ""
)

	return
 mw
.
text
.
nowiki
(
str
)

end


-- split splits text at boundaries specified by separator

-- and returns the chunk for the index idx (starting at 1)

-- #invoke:String2 |split |text |separator |index |true/false

-- #invoke:String2 |split |txt=text |sep=separator |idx=index |plain=true/false

-- if plain is false/no/0 then separator is treated as a Lua pattern - defaults to plain=true

p
.
split
 =
 function
(
frame
)

	local
 args
 =
 frame
.
args

	if
 not
(
args
[
1
]
 or
 args
.
txt
)
 then
 args
 =
 frame
:
getParent
().
args
 end

	local
 txt
 =
 args
[
1
]
 or
 args
.
txt
 or
 ""

	if
 txt
 ==
 ""
 then
 return
 nil
 end

	local
 sep
 =
 (
args
[
2
]
 or
 args
.
sep
 or
 ""
):
gsub
(
'"'
,
 ''
)

	local
 idx
 =
 tonumber
(
args
[
3
]
 or
 args
.
idx
)
 or
 1

	local
 plain
 =
 (
args
[
4
]
 or
 args
.
plain
 or
 "true"
):
sub
(
1
,
1
)

	plain
 =
 (
plain
 ~=
 "f"
 and
 plain
 ~=
 "n"
 and
 plain
 ~=
 "0"
)

	local
 splittbl
 =
 mw
.
text
.
split
(
 txt
,
 sep
,
 plain
 )

	if
 idx
 <
 0
 then
 idx
 =
 #
splittbl
 +
 idx
 +
 1
 end

	return
 splittbl
[
idx
]

end


-- val2percent scans through a string, passed as either the first unnamed parameter or |txt=

-- it converts each number it finds into a percentage and returns the resultant string.

p
.
val2percent
 =
 function
(
frame
)

	local
 args
 =
 frame
.
args

	if
 not
(
args
[
1
]
 or
 args
.
txt
)
 then
 args
 =
 frame
:
getParent
().
args
 end

	local
 txt
 =
 mw
.
text
.
trim
(
args
[
1
]
 or
 args
.
txt
 or
 ""
)

	if
 txt
 ==
 ""
 then
 return
 nil
 end

	local
 function
 v2p
 (
x
)

		x
 =
 (
tonumber
(
x
)
 or
 0
)
 *
 100

		if
 x
 ==
 math.floor
(
x
)
 then
 x
 =
 math.floor
(
x
)
 end

		return
 x
 ..
 "%"

	end

	txt
 =
 txt
:
gsub
(
"%d[%d%.]*"
,
 v2p
)
 -- store just the string

	return
 txt

end


-- one2a scans through a string, passed as either the first unnamed parameter or |txt=

-- it converts each occurrence of 'one ' into either 'a ' or 'an ' and returns the resultant string.

p
.
one2a
 =
 function
(
frame
)

	local
 args
 =
 frame
.
args

	if
 not
(
args
[
1
]
 or
 args
.
txt
)
 then
 args
 =
 frame
:
getParent
().
args
 end

	local
 txt
 =
 mw
.
text
.
trim
(
args
[
1
]
 or
 args
.
txt
 or
 ""
)

	if
 txt
 ==
 ""
 then
 return
 nil
 end

	txt
 =
 txt
:
gsub
(
" one "
,
 " a "
):
gsub
(
"^one"
,
 "a"
):
gsub
(
"One "
,
 "A "
):
gsub
(
"a ([aeiou])"
,
 "an %1"
):
gsub
(
"A ([aeiou])"
,
 "An %1"
)

	return
 txt

end


-- [[Special:Diff/82782106]] 公示通過，執行提案

-- findpagetext returns the position of a piece of text in a page

-- First positional parameter or |text is the search text

-- Optional parameter |title is the page title, defaults to current page

-- Optional parameter |plain is either true for plain search (default) or false for Lua pattern search

-- Optional parameter |nomatch is the return value when no match is found; default is nil

p
.
_findpagetext
 =
 function
(
args
)

	-- process parameters

	local
 nomatch
 =
 args
.
nomatch
 or
 ""

	if
 nomatch
 ==
 ""
 then
 nomatch
 =
 nil
 end

	--

	local
 text
 =
 mw
.
text
.
trim
(
args
[
1
]
 or
 args
.
text
 or
 ""
)

	if
 text
 ==
 ""
 then
 return
 nil
 end

	--

	local
 title
 =
 args
.
title
 or
 ""

	local
 titleobj

	if
 title
 ==
 ""
 then

		titleobj
 =
 mw
.
title
.
getCurrentTitle
()

	else

		titleobj
 =
 mw
.
title
.
new
(
title
)

	end

	--

	local
 plain
 =
 args
.
plain
 or
 ""

	if
 plain
:
sub
(
1
,
 1
)
 ==
 "f"
 then
 plain
 =
 false
 else
 plain
 =
 true
 end

	-- get the page content and look for 'text' - return position or nomatch

	local
 content
 =
 titleobj
 and
 titleobj
:
getContent
()

	return
 content
 and
 mw
.
ustring
.
find
(
content
,
 text
,
 1
,
 plain
)
 or
 nomatch

end

p
.
findpagetext
 =
 function
(
frame
)

	local
 args
 =
 frame
.
args

	local
 pargs
 =
 frame
:
getParent
().
args

	for
 k
,
 v
 in
 pairs
(
pargs
)
 do

		args
[
k
]
 =
 v

	end

	if
 not
 (
args
[
1
]
 or
 args
.
text
)
 then
 return
 nil
 end

	-- just the first value

	return
 (
p
.
_findpagetext
(
args
))

end


-- returns the decoded url. Inverse of parser function {{urlencode:val|TYPE}}

-- Type is:

-- QUERY decodes + to space (default)

-- PATH does no extra decoding

-- WIKI decodes _ to space

p
.
_urldecode
 =
 function
(
url
,
 type
)

	url
 =
 url
 or
 ""

	type
 =
 (
type
 ==
 "PATH"
 or
 type
 ==
 "WIKI"
)
 and
 type

	return
 mw
.
uri
.
decode
(
 url
,
 type
 )

end

-- {{#invoke:String2|urldecode|url=url|type=type}}

p
.
urldecode
 =
 function
(
frame
)

	return
 mw
.
uri
.
decode
(
 frame
.
args
.
url
,
 frame
.
args
.
type
 )

end


-- what follows was merged from Module:StringFunc


-- helper functions

p
.
_GetParameters
 =
 require
(
'Module:GetParameters'
)


-- Argument list helper function, as per Module:String

p
.
_getParameters
 =
 p
.
_GetParameters
.
getParameters


-- Escape Pattern helper function so that all characters are treated as plain text, as per Module:String

function
 p
.
_escapePattern
(
 pattern_str
)

	return
 mw
.
ustring
.
gsub
(
 pattern_str
,
 "([%(%)%.%%%+%-%*%?%[%^%$%]])"
,
 "%%%1"
 )

end


-- Helper Function to interpret boolean strings, as per Module:String

p
.
_getBoolean
 =
 p
.
_GetParameters
.
getBoolean


--[[

Strip


This function Strips characters from string


Usage:

{{#invoke:String2|strip|source_string|characters_to_strip|plain_flag}}


Parameters

	source: The string to strip

	chars:  The pattern or list of characters to strip from string, replaced with ''

	plain:  A flag indicating that the chars should be understood as plain text. defaults to true.


Leading and trailing whitespace is also automatically stripped from the string.

]]

function
 p
.
strip
(
 frame
 )

	local
 new_args
 =
 p
.
_getParameters
(
 frame
.
args
,
  {
'source'
,
 'chars'
,
 'plain'
}
 )

	local
 source_str
 =
 new_args
[
'source'
]
 or
 ''

	local
 chars
 =
 new_args
[
'chars'
]
 or
 ''
 or
 'characters'

	source_str
 =
 mw
.
text
.
trim
(
source_str
)

	if
 source_str
 ==
 ''
 or
 chars
 ==
 ''
 then

		return
 source_str

	end

	local
 l_plain
 =
 p
.
_getBoolean
(
 new_args
[
'plain'
]
 or
 true
 )

	if
 l_plain
 then

		chars
 =
 p
.
_escapePattern
(
 chars
 )

	end

	local
 result

	result
 =
 mw
.
ustring
.
gsub
(
source_str
,
 "["
..
chars
..
"]"
,
 ''
)

	return
 result

end


--[[

Match any

Returns the index of the first given pattern to match the input. Patterns must be consecutively numbered.

Returns the empty string if nothing matches for use in {{#if:}}


Usage:

	{{#invoke:String2|matchAll|source=123 abc|456|abc}} returns '2'.


Parameters:

	source: the string to search

	plain:  A flag indicating that the patterns should be understood as plain text. defaults to true.

	1, 2, 3, ...: the patterns to search for

]]

function
 p
.
matchAny
(
frame
)

	local
 source_str
 =
 frame
.
args
[
'source'
]
 or
 error
(
'The source parameter is mandatory.'
)

	local
 l_plain
 =
 p
.
_getBoolean
(
 frame
.
args
[
'plain'
]
 or
 true
 )

	for
 i
 =
 1
,
 math.huge
 do

		local
 pattern
 =
 frame
.
args
[
i
]

		if
 not
 pattern
 then
 return
 ''
 end

		if
 mw
.
ustring
.
find
(
source_str
,
 pattern
,
 1
,
 l_plain
)
 then

			return
 tostring
(
i
)

		end

	end

end


--[[--------------------------< H Y P H E N _ T O _ D A S H >--------------------------------------------------


Converts a hyphen to a dash under certain conditions.  The hyphen must separate

like items; unlike items are returned unmodified.  These forms are modified:

	letter - letter (A - B)

	digit - digit (4-5)

	digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5)

	letterdigit - letterdigit (A1-A5) (an optional separator between letter and

		digit is supported ? a.1-a.5 or a-1-a-5)

	digitletter - digitletter (5a - 5d) (an optional separator between letter and

		digit is supported ? 5.a-5.d or 5-a-5-d)


any other forms are returned unmodified.


str may be a comma- or semicolon-separated list


]]

function
 p
.
hyphen_to_dash
(
 str
,
 spacing
 )

	if
 (
str
 ==
 nil
 or
 str
 ==
 ''
)
 then

		return
 str

	end


	local
 accept


	str
 =
 mw
.
text
.
decode
(
str
,
 true
 )
											-- replace html entities with their characters; semicolon mucks up the text.split


	local
 out
 =
 {}

	local
 list
 =
 mw
.
text
.
split
 (
str
,
 '%s*[,;]%s*'
)
								-- split str at comma or semicolon separators if there are any


	for
 _
,
 item
 in
 ipairs
 (
list
)
 do
												-- for each item in the list

		item
 =
 mw
.
text
.
trim
(
item
)
												-- trim whitespace

		item
,
 accept
 =
 item
:
gsub
 (
'^%(%((.+)%)%)$'
,
 '%1'
)

		if
 accept
 ==
 0
 and
 mw
.
ustring
.
match
 (
item
,
 '^%w*[%.%-]?%w+%s*[%-??]%s*%w*[%.%-]?%w+$'
)
 then
	-- if a hyphenated range or has endash or emdash separators

			if
 item
:
match
 (
'^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$'
)
 or
			-- letterdigit hyphen letterdigit (optional separator between letter and digit)

				item
:
match
 (
'^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$'
)
 or
			-- digitletter hyphen digitletter (optional separator between digit and letter)

				item
:
match
 (
'^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$'
)
 or
			-- digit separator digit hyphen digit separator digit

				item
:
match
 (
'^%d+%s*%-%s*%d+$'
)
 or
								-- digit hyphen digit

				item
:
match
 (
'^%a+%s*%-%s*%a+$'
)
 then
							-- letter hyphen letter

					item
 =
 item
:
gsub
 (
'(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)'
,
 '%1?%2'
)
	-- replace hyphen, remove extraneous space characters

			else

				item
 =
 mw
.
ustring
.
gsub
 (
item
,
 '%s*[??]%s*'
,
 '?'
)
				-- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace

			end

		end

		table.insert
 (
out
,
 item
)
												-- add the (possibly modified) item to the output table

	end


	local
 temp_str
 =
 table.concat
 (
out
,
 ','
 ..
 spacing
)
							-- concatenate the output table into a comma separated string

	temp_str
,
 accept
 =
 temp_str
:
gsub
 (
'^%(%((.+)%)%)$'
,
 '%1'
)
					-- remove accept-this-as-written markup when it wraps all of concatenated out

	if
 accept
 ~=
 0
 then

		temp_str
 =
 str
:
gsub
 (
'^%(%((.+)%)%)$'
,
 '%1'
)
							-- when global markup removed, return original str; do it this way to suppress boolean second return value

	end

	return
 temp_str

end


function
 p
.
hyphen2dash
(
 frame
 )

	local
 str
 =
 frame
.
args
[
1
]
 or
 ''

	local
 spacing
 =
 frame
.
args
[
2
]
 or
 ' '
 -- space is part of the standard separator for normal spacing (but in conjunction with templates r/rp/ran we may need a narrower spacing


	return
 p
.
hyphen_to_dash
(
str
,
 spacing
)

end


-- Similar to [[Module:String#endswith]]

function
 p
.
startswith
(
frame
)

	return
 (
frame
.
args
[
1
]:
sub
(
1
,
 frame
.
args
[
2
]:
len
())
 ==
 frame
.
args
[
2
])
 and
 'yes'
 or
 ''

end


return
 p