Module:DateI18n: Difference between revisions

From WWII Archives

m (1 revision imported)
wc>Jarekt
Line 8: Line 8:
This module is intended for processing of date strings.
This module is intended for processing of date strings.


Please do not modify this code without applying the changes first at Module:Date/sandbox and testing  
Please do not modify this code without applying the changes first at Module:DateI18n/sandbox
at Module:Date/sandbox/testcases and Module talk:Date/sandbox/testcases.
and testing at Module:DateI18n/sandbox/testcases and Module talk:DateI18n/sandbox/testcases.


Authors and maintainers:
Authors and maintainers:
* User:Parent5446 - original version of the function mimicking template:ISOdate
* User:Parent5446 - original version of the function mimicking Template:ISOdate
* User:Jarekt - original version of the functions mimicking template:Date  
* User:Jarekt - original version of the functions mimicking Template:Date  
]]
]]
-- =======================================
-- === Dependencies ======================
-- =======================================
require('strict')
require('strict')


-- ==================================================
-- =======================================
-- === Internal functions ===========================
-- === Local Functions ===================
-- ==================================================
-- =======================================


-- Function allowing for consistent treatment of boolean-like wikitext input.
------------------------------------------------------------------------------
-- It works similarly to Module:Yesno
--[[ (copied from Module:Core)
Function allowing for consistent treatment of boolean-like wikitext input.
Inputs:
  1) val - value to be evaluated, outputs as a function of values:
true  : true  (boolean), 1 (number), or strings: "yes", "y", "true", "1"
false : false (boolean), 0 (number), or strings: "no", "n", "false", "0"
  2) default - value to return otherwise
See Also: It works similarly to Module:Yesno
]]
local function yesno(val, default)
local function yesno(val, default)
if type(val) == 'boolean' then
if type(val) == 'boolean' then
return val
return val
elseif type(val) == 'number' then
elseif type(val) == 'number' then
if val==1 then  
val = tostring(val)
return true
end
elseif val==0 then
if type(val) == 'string' then
return false
local LUT = {
yes=true , y=true , ['true'] =true , t=true , ['1']=true , on =true,
no =false, n=false, ['false']=false, f=false, ['0']=false, off=false }
    val = LUT[mw.ustring.lower(val)]  -- put in lower case
    if (val~=nil) then
return val
end
end
elseif type(val) == 'string' then
    val = mw.ustring.lower(val)  -- put in lower case
    if val == 'no'  or val == 'n' or val == 'false' or tonumber(val) == 0 then
        return false
    elseif val == 'yes' or val == 'y' or val == 'true'  or tonumber(val) == 1 then
        return true
    end
     end
     end
     return default
     return default
end
end


---------------------------------------------------------------------------------------
-- String replacement that ignores part of the string in "..."
local function strReplace(String, old, new)
if String:find('"') then
local T={}
for i, str in ipairs(mw.text.split( String, '"', true )) do
if i%2==1 then
str = str:gsub(old, new)
end
table.insert(T, str)
end
return table.concat(T,'"')
else
return String:gsub(old, new)
end
end
---------------------------------------------------------------------------------------
-- process datevec
-- INPUT:
--  * datevec - Array of {year,month,day,hour,minute,second, tzhour, tzmin} containing broken
--    down date-time component strings or numbers
-- OUTPUT:
--  * datecode - a code specifying content of the array where Y' is year, 'M' is month,
--    'D' is day, 'H' is hour, 'M' minute, 'S' is second. output has to be one of YMDHMS, YMDHM, YMD, YM, MD, Y
--  * datenum - same array but holding only numbers or nuls
local function parserDatevec(datevec)
-- if month is not a number than check if it is a month name in project's language
local month = datevec[2]
if month and month~='' and not tonumber(month) then
datevec[2] = mw.getContentLanguage():formatDate( "n", month)
end
-- create datecode based on which variables are provided and check for out-of-bound values
local maxval = {nil, 12, 31, 23, 59, 59,  23, 59} -- max values for year, month, ...
local minval = {nil,  1,  1,  0,  0,  0, -23,  0} -- min values for year, month, ...
local c = {'Y', 'M', 'D', 'H', 'M', 'S', '', ''}
local datecode = '' -- a string signifying which combination of variables was provided
local datenum = {}  -- date-time encoded as a vector = [year, month, ... , second]
for i = 1,8 do
datenum[i] = tonumber(datevec[i])
if datenum[i] and (i==1 or (datenum[i]>=minval[i] and datenum[i]<=maxval[i])) then
datecode = datecode .. c[i]
end
end
return datecode, datenum
end
---------------------------------------------------------------------------------------
-- process datevec
-- INPUT:
--  * datecode - a code specifying content of the array where Y' is year, 'M' is month,
--    'D' is day, 'H' is hour, 'M' minute, 'S' is second. output has to be one of YMDHMS, YMDHM, YMD, YM, MD, Y
--  * datenum - Array of {year,month,day,hour,minute,second, tzhour, tzmin} as numbers or nuls
-- OUTPUT:
--  * timeStamp - date string in the format taken by mw.language:formatDate lua function and {{#time}} perser function
--      https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#mw.language:formatDate
--      https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions#.23time
--  * datecode - with possible corrections
local function getTimestamp(datecode, datenum)
-- create time stamp string (for example 2000-02-20 02:20:20) based on which variables were provided
local timeStamp
if datecode == 'YMDHMS' then
timeStamp = string.format('%04i-%02i-%02i %02i:%02i:%02i', datenum[1], datenum[2], datenum[3], datenum[4], datenum[5], datenum[6] )
elseif datecode == 'YMDHM' then
timeStamp = string.format('%04i-%02i-%02i %02i:%02i', datenum[1], datenum[2], datenum[3], datenum[4], datenum[5] )
elseif datecode:sub(1,3)=='YMD' then
timeStamp = string.format('%04i-%02i-%02i', datenum[1], datenum[2], datenum[3] )
datecode = 'YMD' -- 'YMD', 'YMDHMS' and 'YMDHM' are the only supported format starting with 'YMD'. All others will be converted to 'YMD'
elseif datecode == 'YM' then
timeStamp = string.format('%04i-%02i', datenum[1], datenum[2] )
elseif datecode:sub(1,1)=='Y' then
timeStamp = string.format('%04i', datenum[1] )
datecode = 'Y'
elseif datecode == 'M' then
timeStamp = string.format('%04i-%02i-%02i', 2000, datenum[2], 1 )
elseif datecode == 'MD' then
timeStamp = string.format('%04i-%02i-%02i', 2000, datenum[2], datenum[3] )
else
timeStamp = nil -- format not supported
end
return timeStamp, datecode
end


---------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------
Line 159: Line 87:
-- Look up proper format string to be passed to {{#time}} parser function
-- Look up proper format string to be passed to {{#time}} parser function
-- INPUTS:
-- INPUTS:
--  * datecode: YMDHMS, YMDHM, YMD, YM, MD, Y, or M
--  * datecode: YMDhms, YMDhm, YMD, YM, Y, MDhms, MDhm, MD, or M
--  * day    : Number between 1 and 31 (not needed for most languages)
--  * day    : Number between 1 and 31 (not needed for most languages)
--  * lang    : language
--  * lang    : language
Line 167: Line 95:
local function parseFormat(dFormat, day)
local function parseFormat(dFormat, day)
if dFormat:find('default') and #dFormat>10 then
if dFormat:find('default') and #dFormat>10 then
-- special (and messy) case of dFormat code depending on a day number
-- Special (and messy) case of dFormat code depending on a day number, where data is a
-- then json contains a string with more json containing "default" field and 2 digit day keys
-- JSON-encoded table {”default”:”*”,”dDD”:”*”} including fields for specific 2-digit days.
-- if desired day is not in that json than use "default" case
-- Change curly double quotes (possibly used for easier editing in tabular data) in dFormat
dFormat = dFormat:gsub('”','"') -- change fancy double quote to a straight one, used for json marking
-- to straight ASCII double quotes (required for parsing of this JSON-encoded table).
local D = mw.text.jsonDecode( dFormat ) --com = mw.dumpObject(D)
local D = mw.text.jsonDecode(mw.ustring.gsub(dFormat, '[„“‟”]', '"')) --com = mw.dumpObject(D)
day = string.format('d%02i',day) -- create day key
-- If the desired day is not in that JSON table, then use its "default" case.
dFormat = D[day] or D.default
dFormat = D[string.format('d%02i', day)] or D.default
dFormat = dFormat:gsub("'", '"') -- change single quote to a double quote, used for {{#time}} marking
            -- Change ASCII single quotes to ASCII double quotes used for {{#time}} marking.
            -- Apostrophes needed in plain-text must not use ASCII single quotes but curly apostrophe
            -- e.g. { ‟default”: ‟j”, ‟d01”: ‟j’'o'” }, not { ‟default”: ‟j”, ‟d01”: ‟j''o'” }.
end
end
dFormat = dFormat:gsub("'", '"')
return dFormat
return dFormat
end
end
Line 185: Line 116:
T[id] = msg
T[id] = msg
end
end
    -- Compatibility of legacy data using 'HMS' or 'HM', where 'M' is ambiguous
    T.YMDhms = T.YMDhms or T.YMDHMS
    T.YMDhm  = T.YMDhm  or T.YMDHM
    datecode = datecode == 'YMDHMS' and 'YMDhms' or datecode == 'YMDHM' and 'YMDhm' or datecode
local dFormat = T[datecode]
local dFormat = T[datecode]
if dFormat=='default' and (datecode=='YMDHMS' or datecode=='YMDHM')  then  
if dFormat == 'default' and (datecode == 'YMDhms' or datecode == 'YMDhm')  then  
-- for most languages adding hour:minute:second is done by adding ", HH:MM:SS to the  
-- For most languages adding hour:minute:second is done by adding ", HH:ii:ss to the  
-- day precission date, those languages are skipped in DateI18n.tab and default to  
-- day precission date, those languages are skipped in DateI18n.tab and default to  
-- English which stores word "default"
-- English which stores word "default"
dFormat = parseFormat(T['YMD'], day).. ', H:i'
dFormat = parseFormat(T['YMD'], day).. ', H:i'
if datecode=='YMDHMS' then
if datecode == 'YMDhms' then
dFormat = dFormat .. ':s'
dFormat = dFormat .. ':s'
end
end
Line 209: Line 145:
--  * dFormat : input to {{#time}} function
--  * dFormat : input to {{#time}} function
local function MonthCase(month, case, lang)
local function MonthCase(month, case, lang)
if month == nil or case == nil then
return nil
end
local T = {{},{},{},{},{},{},{},{},{},{},{},{}}
local T = {{},{},{},{},{},{},{},{},{},{},{},{}}
local tab = mw.ext.data.get('I18n/MonthCases.tab', lang)
local tab = mw.ext.data.get('I18n/MonthCases.tab', lang)
Line 222: Line 161:
-- ==================================================
-- ==================================================
local p = {}
local p = {}
-- ===========================================================================
-- === Functions accesible from the outside to allow unit-testing
-- === Please do not use directly as they could change in the future
-- ===========================================================================
---------------------------------------------------------------------------------------
-- Single string replacement that ignores part of the string in "..."
function p.strReplace(String, old, new)
if String:find('"') then
local T={}
for i, str in ipairs(mw.text.split( String, '"', true )) do
if i%2==1 then
str = str:gsub(old, new, 1)
end
table.insert(T, str)
end
return table.concat(T,'"')
else
return String:gsub(old, new, 1)
end
end
---------------------------------------------------------------------------------------
-- process datevec
-- INPUT:
--  * datevec - Array of {year,month,day,hour,minute,second, tzhour, tzmin} containing broken
--    down date-time component strings or numbers
-- OUTPUT:
--  * datenum - same array but holding only numbers or nuls
function p.clean_datevec(datevec)
-- create datecode based on which variables are provided and check for out-of-bound values
-- check special case of month provided as a name
local month = datevec[2]
if type(month) == 'string' and month ~= '' and not tonumber(month) then
-- When the month is not a number, check if it's a month name in the project's language.
datevec[2] = mw.getContentLanguage():formatDate('n', month)
end
-- check bounds
local maxval = {  1/0, 12, 31, 23, 59, 59,  23, 59 } -- max values (or  1/0=+inf) for year, month, day, hour, minute, second, tzhour, tzmin
local minval = { -1/0, 01, 01, 00, 00, 00, -23, 00 } -- min values (or -1/0=-inf) for year, month, ...
local datenum  = {} -- date-time encoded as a vector = [year, month, ... , second, tzhour, tzmin]
for i = 1, 8 do
        local val = tonumber(datevec[i])
        if val and val >= minval[i] and val <= maxval[i] then -- These tests work with infinite min/max values.
    datenum[i] = val
end
end
-- leap second
if tonumber(datevec[6]) == 60 then -- leap second '60' is valid only at end of 23:59 UTC, on 30 June or 31 December of specific years
-- datenum[6] = 60
local MDhm = table.concat({unpack(datenum,2,5)}, ',')
    if (MDhm == table.concat({6, 30, 23, 59}, ',')) or (MDhm == table.concat({12, 31, 23, 59}, ',')) then
  datenum[6] = 60
    end
end
return datenum
end
---------------------------------------------------------------------------------------
-- process datevec
-- INPUT:
--  * datenum - Array of {year,month,day,hour,minute,second, tzhour, tzmin} as numbers or nuls
-- OUTPUT:
--  * timeStamp - date string in the format taken by mw.language:formatDate lua function and {{#time}} parser function
--      https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#mw.language:formatDate
--      https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions#.23time
--  * datecode - a code specifying content of the array where Y' is year, 'M' is month,
--    'D' is day, 'h' is hour, 'm' minute, 's' is second.
--    Output has to be one of YMDhms, YMDhm, YMD, YM, Y, MDhms, MDhm, MD, M.
function p.getTimestamp(datenum)
-- create datecode based on datenum
local codes  = { 'Y', 'M', 'D', 'h', 'm', 's'}
local datecode = '' -- a string signifying which combination of variables was provided
for i, c in ipairs(codes) do
datecode = datecode .. (datenum[i] and c or '') -- if datenum[i] than append codes[i] to datecode
end
-- create timestamp string (for example 2000-02-20 02:20:20) based on which variables were provided
local timeStamp
    -- date starting by a year
if datecode == 'YMDhms' then
timeStamp = string.format('%04i-%02i-%02i %02i:%02i:%02i', datenum[1], datenum[2], datenum[3], datenum[4], datenum[5], datenum[6] )
elseif datecode == 'YMDhm' then
timeStamp = string.format('%04i-%02i-%02i %02i:%02i', datenum[1], datenum[2], datenum[3], datenum[4], datenum[5] )
elseif datecode:sub(1,3)=='YMD' then
timeStamp = string.format('%04i-%02i-%02i', datenum[1], datenum[2], datenum[3] )
datecode  = 'YMD' -- 'YMDhms', 'YMDhm' and 'YMD' are the only supported format starting with 'YMD'; all others will be converted to 'YMD'.
elseif datecode:sub(1,2) == 'YM' then
timeStamp = string.format('%04i-%02i', datenum[1], datenum[2] )
datecode  = 'YM'
elseif datecode:sub(1,1)=='Y' then
timeStamp = string.format('%04i', datenum[1] )
datecode  = 'Y'
    -- date starting by a month (the implied year is 2000)
elseif datecode== 'MDhms' then
timeStamp = string.format('%04i-%02i-%02i %02i:%02i:%02i', 2000, datenum[2], datenum[3], datenum[4], datenum[5], datenum[6] )
elseif datecode == 'MDhm' then
timeStamp = string.format('%04i-%02i-%02i %02i:%02i', 2000, datenum[2], datenum[3], datenum[4], datenum[5] )
elseif datecode:sub(1,2) == 'MD' then
timeStamp = string.format('%04i-%02i-%02i', 2000, datenum[2], datenum[3] )
datecode = 'MD' -- 'MDhms', 'MDhm' and 'MD' are the only supported format starting with 'MD'; all others will be converted to 'MD'
elseif datecode:sub(1,1) == 'M' then -- Ambiguous: could mean minutes, but here means month (when parsed as a name/abbrev, not as a number).
timeStamp = string.format('%04i-%02i-%02i', 2000, datenum[2], 1 )
datecode  = 'M'
    -- other possible but unrecognized formats (e.g. 'DHis', 'DHi', 'D', 'His', 'Hi');
    -- note that 'Dh', 'D', 'h', 's' may eventually work, but not 'm' for minute only, which is ambiguous with 'M' for month only.
else
timeStamp = nil -- format not supported
end
return timeStamp, datecode
end
local function isValidLangCode(lang)
if not lang then
return false
end
lang = mw.text.trim(lang)
return lang ~= '' and lang ~= '⧼Lang⧽' and mw.language.isValidCode(lang)
end
-- ===========================================================================
-- === Version of the function to be called from other LUA codes
-- ===========================================================================


--[[ ========================================================================================
--[[ ========================================================================================
Line 239: Line 307:
   * class: CSS class for the <time> node, use "" for no metadata at all
   * class: CSS class for the <time> node, use "" for no metadata at all
]]
]]
function p._Date(datevec, lang, case, class, trim_year)
function p._Date(datevec, lang, case, class, trim_year)
-- make sure inputs are in the right format
-- make sure inputs are in the right format
if not lang or not mw.language.isValidCode( lang ) then
lang = mw.getCurrentFrame():callParserFunction( "int", "lang" ) -- get user's chosen language
-- set language
if not isValidLangCode(lang) then
-- get user's chosen language
-- equivalent to {{int:lang}}
lang = mw.getCurrentFrame():callParserFunction("int", "lang")
if not isValidLangCode(lang) then
-- if that doesn't work, use the project language
-- this is useful on projects which import this module from Commons
lang = mw.language.getContentLanguage().code
if not isValidLangCode(lang) then
-- if that doesn't work, use English
lang = "en"
end
end
end
end
if lang == 'be-tarsk' then
if lang == 'be-tarask' then
lang = 'be-x-old'
lang = 'be-x-old'
end
end
-- process datevec and extract timeStamp and datecode strings as well as numeric datenum array
-- process datevec and extract timeStamp and datecode strings as well as numeric datenum array
local datecode,  datenum  = parserDatevec(datevec)
local datenum  = p.clean_datevec(datevec)
local year, month, day = datenum[1], datenum[2], datenum[3]
local year, month, day = datenum[1], datenum[2], datenum[3]
local timeStamp, datecode = getTimestamp(datecode, datenum)
local timeStamp, datecode = p.getTimestamp(datenum)
if not timeStamp then -- something went wrong in parserDatevec
if not timeStamp then -- something went wrong in parserDatevec
return ''
return ''
Line 265: Line 348:
-- phrases as it is done in [[c:Module:Complex date]]
-- phrases as it is done in [[c:Module:Complex date]]
case = case or ''
case = case or ''
if (lang=='qu' or lang=='qug') and (case=='nom') then
if (lang=='qu' or lang=='qug') and case=='nom' then
-- Special case related to Quechua and Kichwa languages. The form in the I18n is
-- Special case related to Quechua and Kichwa languages. The form in the I18n is
--  Genitive case with suffix "pi" added to month names provided by {#time}}
--  Genitive case with suffix "pi" added to month names provided by {#time}}
Line 271: Line 354:
-- see https://commons.wikimedia.org/wiki/Template_talk:Date#Quechua from 2014
-- see https://commons.wikimedia.org/wiki/Template_talk:Date#Quechua from 2014
dFormat = dFormat:gsub('F"pi"', 'F')
dFormat = dFormat:gsub('F"pi"', 'F')
elseif (case=='gen') then
elseif case == 'gen' then
dFormat = strReplace(dFormat, "F", "xg")
dFormat = p.strReplace(dFormat, "F", "xg")
elseif (case=='nom') then
elseif case == 'nom' then
dFormat = strReplace(dFormat, "xg", "F")
dFormat = p.strReplace(dFormat, "xg", "F")
elseif (case ~= '') then
elseif case ~= '' and month ~= nil then
-- see is page [[Data:I18n/MonthCases.tab]] on Commons have name of the month  
-- see is page [[Data:I18n/MonthCases.tab]] on Commons have name of the month  
-- in specific gramatic case in desired language. If we have it than replace  
-- in specific gramatic case in desired language. If we have it than replace  
Line 281: Line 364:
local monthMsg = MonthCase(month, case, lang)
local monthMsg = MonthCase(month, case, lang)
if  monthMsg and monthMsg ~= '' then -- make sure it exists
if  monthMsg and monthMsg ~= '' then -- make sure it exists
dFormat = strReplace(dFormat, 'F',  '"'..monthMsg..'"') -- replace default month with month name we already looked up
dFormat = p.strReplace(dFormat, 'F',  '"'..monthMsg..'"') -- replace default month with month name we already looked up
dFormat = strReplace(dFormat, 'xg', '"'..monthMsg..'"')
dFormat = p.strReplace(dFormat, 'xg', '"'..monthMsg..'"')
end
end
end
end


     -- Translate the date using specified format
     -- Translate the date using specified format.
-- See https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#mw.language:formatDate and  
-- See https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#mw.language:formatDate and  
-- https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions##time for explanation of the format
-- https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions##time for explanation of the format
local datestr = mw.language.new(lang):formatDate( dFormat, timeStamp) -- same as using {{#time}} parser function
local langObj = mw.language.new(lang)
local datestr = langObj:formatDate(dFormat, timeStamp) -- same as using {{#time}} parser function
-- Special case related to Thai solar calendar: prior to 1940 new-year was at different  
-- Special case related to Thai solar calendar: prior to 1940 new-year was at different time of year,
-- time of year, so just year (datecode=='Y') is ambiguous and is replaced by "YYYY or YYYY" phrase
-- so just year (datecode == 'Y') is ambiguous and is replaced by "YYYY or YYYY" phrase
if lang=='th' and datecode=='Y' and year<=1940 then
if lang=='th' and datecode=='Y' and year<=1940 then
datestr = string.format('%04i หรือ %04i', year+542, year+543 )  
datestr = string.format('%04i หรือ %04i', year+542, year+543 )  
end
end
-- If year<1000 than either keep the date padded to the length of 4 digits or trim it
-- If year < 1000 than either keep the date padded to the length of 4 digits or trim it.
-- decide if the year will stay padded with zeros (for years in 0-999 range)
-- Decide if the year will stay padded with zeros (for years in 0-999 range).
if year and year<1000 then
if year and year < 1000 then
if type(trim_year)=='nil' then
trim_year = yesno(trim_year, trim_year or '100-999')
trim_year = '100-999'
if type(trim_year) == 'string' then
end
-- If `trim_year` not a simple boolean, then it's a range of dates.
local trim = yesno(trim_year,nil) -- convert to boolean
-- For example '100-999' means to pad 1-or-2-digit years to be 4-digit long, while keeping 3-digit years as is.
if trim==nil and type(trim_year)=='string' then
-- if "trim_year" not a simple True/False than it is range of dates
-- for example '100-999' means to pad one and 2 digit years to be 4 digit long, while keeping 3 digit years as is
local YMin, YMax = trim_year:match( '(%d+)-(%d+)' )
local YMin, YMax = trim_year:match( '(%d+)-(%d+)' )
trim = (YMin~=nil and year>=tonumber(YMin) and year<=tonumber(YMax))  
trim_year = YMin and year >= tonumber(YMin) and year <= tonumber(YMax)
end
end
if trim==true then
if trim_year then
datestr = trimYear(datestr, year, lang) -- in datestr replace long year with trimmed one
datestr = trimYear(datestr, year, lang) -- in datestr replace long year with trimmed one
end
end
end
end


-- append timezone if present
-- Append a timezone if present (after the hour and minute of the day).
if datenum[7] and (datecode == 'YMDHMS' or datecode == 'YMDHM') then
if datenum[7] and (datecode:sub(1, 5) == 'YMDhm' or datecode:sub(1, 4) == 'MDhm') then
-- use {{#time}} parser function to create timezone string, so that we use correct character set
-- Use {{#time}} parser function to create timezone string, so that we use the correct character set.
local sign = (datenum[7]<0) and '−' or '+'
local sign = (datenum[7]<0) and '−' or '+'
timeStamp = string.format("2000-01-01 %02i:%02i:00", math.abs(datenum[7]), datenum[8] or 0)
timeStamp = string.format("2000-01-01 %02i:%02i:00", math.abs(datenum[7]), datenum[8] or 0)
local timezone = mw.language.new(lang):formatDate( 'H:i', timeStamp) -- same as using {{#time}} parser function
local timezone = langObj:formatDate('H:i', timeStamp) -- same as using {{#time}} parser function
datestr = string.format("%s %s%s", datestr, sign, timezone )
datestr = string.format("%s %s%s", datestr, sign, timezone )
end
end


-- html formating and tagging of date string
-- HTML formating of date string and tagging for microformats (only for absolute dates with a year).
if class and class ~= '' and datecode~='M' and datecode~='MD'then
if class and class ~= '' and class ~= '-' and datecode:sub(1,1) == 'Y' then  
local DateHtmlTags = '<span style="white-space:nowrap"><time class="%s" datetime="%s">%s</time></span>'
local pat = '<time class="%s" datetime="%s" lang="%s" dir="%s" style="white-space:nowrap">%s</time>'
datestr = DateHtmlTags:format(class, timeStamp, datestr)
datestr = pat:format(class, timeStamp, lang, langObj:getDir(), datestr)
end
end
return datestr
return datestr
end
end
-- ===========================================================================
-- === Version of the function to be called from template namespace
-- ===========================================================================


--[[ ========================================================================================
--[[ ========================================================================================
Line 348: Line 433:
]]
]]
function p.Date(frame)
function p.Date(frame)
-- get args
local args = {}
local args = {}
for name, value in pairs( frame.args ) do  
for key, value in pairs(frame.args) do  
name = string.gsub( string.lower(name), ' ', '_')
local trimmed_key = string.gsub(string.lower(mw.text.trim(key)), ' ', '_')
args[name] = value
local trimmed_value = mw.text.trim(value)
if trimmed_key ~= 'class' and trimmed_value == '' then
trimmed_value = nil
end
args[trimmed_key] = trimmed_value
end
-- default values
-- Allows to set the html class of the time node where the date is included. This is useful for microformats.
args.class = args.class or '-'
if args.class == '' then
args.class = 'dtstart'
end
end
-- By default, pad one- and two-digit years to be 4 digits long, while keeping three-digit years as-is.
args.trim_year = args.trim_year or '100-999'
return p._Date(
return p._Date(
{ args.year, args.month, args.day, args.hour, args.minute, args.second, args.tzhour, args.tzmin },
{args.year, args.month, args.day, args.hour, args.minute, args.second, args.tzhour, args.tzmin},
args.lang,                 -- language
args.lang,
args.case,                 -- allows to specify grammatical case for the month for languages that use them
args.case,
args.class or 'dtstart',   -- allows to set the html class of the time node where the date is included. This is useful for microformats.
args.class,
args.trim_year or '100-999' -- by default pad one and 2 digit years to be 4 digit long, while keeping 3 digit years as is
args.trim_year
)
)
end
end


return p
return p

Revision as of 23:40, 24 December 2023

Documentation for this module may be created at Module:DateI18n/doc

--[[  
  __  __           _       _        ____        _       ___ _  ___        
 |  \/  | ___   __| |_   _| | ___ _|  _ \  __ _| |_ ___|_ _/ |( _ ) _ __  
 | |\/| |/ _ \ / _` | | | | |/ _ (_) | | |/ _` | __/ _ \| || |/ _ \| '_ \ 
 | |  | | (_) | (_| | |_| | |  __/_| |_| | (_| | ||  __/| || | (_) | | | |
 |_|  |_|\___/ \__,_|\__,_|_|\___(_)____/ \__,_|\__\___|___|_|\___/|_| |_|
  
This module is intended for processing of date strings.

Please do not modify this code without applying the changes first at Module:DateI18n/sandbox
and testing at Module:DateI18n/sandbox/testcases and Module talk:DateI18n/sandbox/testcases.

Authors and maintainers:
* User:Parent5446 - original version of the function mimicking Template:ISOdate
* User:Jarekt - original version of the functions mimicking Template:Date 
]]

-- =======================================
-- === Dependencies ======================
-- =======================================

require('strict')

-- =======================================
-- === Local Functions ===================
-- =======================================

------------------------------------------------------------------------------
--[[ (copied from Module:Core)
Function allowing for consistent treatment of boolean-like wikitext input.
Inputs:
  1) val - value to be evaluated, outputs as a function of values:
		true  : true  (boolean), 1 (number), or strings: "yes", "y", "true", "1"
		false : false (boolean), 0 (number), or strings: "no", "n", "false", "0"
  2) default - value to return otherwise
See Also: It works similarly to Module:Yesno
]]
local function yesno(val, default)
	if type(val) == 'boolean' then
		return val
	elseif type(val) == 'number' then
		val = tostring(val)
	end
	if type(val) == 'string' then
		local LUT = {
			yes=true , y=true , ['true'] =true , t=true , ['1']=true , on =true,
			no =false, n=false, ['false']=false, f=false, ['0']=false, off=false }
	    val = LUT[mw.ustring.lower(val)]  -- put in lower case
	    if (val~=nil) then
			return val
		end
    end
    return default
end


---------------------------------------------------------------------------------------
-- trim leading zeros in years prior to year 1000
-- INPUT:
--  * datestr   - translated date string 
--  * lang      - language of translation
-- OUTPUT:
--  * datestr - updated date string 

local function trimYear(datestr, year, lang)
	local yearStr0, yearStr1, yearStr2, zeroStr
	yearStr0 = string.format('%04i', year ) -- 4 digit year in standard form "0123"
	yearStr1 = mw.language.new(lang):formatDate( 'Y', yearStr0) -- same as calling {{#time}} parser function
	--yearStr1 = mw.getCurrentFrame():callParserFunction( "#time", { 'Y', yearStr0, lang } ) -- translate to a language 
	if yearStr0==yearStr1 then -- most of languages use standard form of year 
		yearStr2 = tostring(year)
	else -- some languages use different characters for numbers
		yearStr2 = yearStr1
		zeroStr = mw.ustring.sub(yearStr1,1,1) -- get "0" in whatever language
		for i=1,3 do -- trim leading zeros
			if mw.ustring.sub(yearStr2,1,1)==zeroStr then
				yearStr2 = mw.ustring.sub(yearStr2, 2, 5-i)
			else
				break
			end
		end
	end
	return string.gsub(datestr, yearStr1, yearStr2 ) -- in datestr replace long year with trimmed one
end

---------------------------------------------------------------------------------------
-- Look up proper format string to be passed to {{#time}} parser function
-- INPUTS:
--  * datecode: YMDhms, YMDhm, YMD, YM, Y, MDhms, MDhm, MD, or M
--  * day     : Number between 1 and 31 (not needed for most languages)
--  * lang    : language
-- OUTPUT:
--  * dFormat : input to {{#time}} function
local function getDateFormat(datecode, day, lang)
	local function parseFormat(dFormat, day)
		if dFormat:find('default') and #dFormat>10 then
			-- Special (and messy) case of dFormat code depending on a day number, where data is a
			-- JSON-encoded table {”default”:”*”,”dDD”:”*”} including fields for specific 2-digit days.
			-- Change curly double quotes (possibly used for easier editing in tabular data) in dFormat
			-- to straight ASCII double quotes (required for parsing of this JSON-encoded table).
			local D = mw.text.jsonDecode(mw.ustring.gsub(dFormat, '[„“‟”]', '"')) --com = mw.dumpObject(D)
			-- If the desired day is not in that JSON table, then use its "default" case.
			dFormat = D[string.format('d%02i', day)] or D.default
            -- Change ASCII single quotes to ASCII double quotes used for {{#time}} marking.
            -- Apostrophes needed in plain-text must not use ASCII single quotes but curly apostrophe
            -- e.g. { ‟default”: ‟j”, ‟d01”: ‟j’'o'” }, not { ‟default”: ‟j”, ‟d01”: ‟j''o'” }.
		end
		dFormat = dFormat:gsub("'", '"')
		return dFormat
	end
	
	local T = {}
	local tab = mw.ext.data.get('DateI18n.tab', lang)
	for _, row in pairs(tab.data) do -- convert the output into a dictionary table
		local id, _, msg = unpack(row)
		T[id] = msg
	end
    -- Compatibility of legacy data using 'HMS' or 'HM', where 'M' is ambiguous
    T.YMDhms = T.YMDhms or T.YMDHMS
    T.YMDhm  = T.YMDhm  or T.YMDHM
    datecode = datecode == 'YMDHMS' and 'YMDhms' or datecode == 'YMDHM' and 'YMDhm' or datecode

	local dFormat = T[datecode]
	if dFormat == 'default' and (datecode == 'YMDhms' or datecode == 'YMDhm')  then 
		-- For most languages adding hour:minute:second is done by adding ", HH:ii:ss to the 
		-- day precission date, those languages are skipped in DateI18n.tab and default to 
		-- English which stores word "default"
		dFormat = parseFormat(T['YMD'], day).. ', H:i'
		if datecode == 'YMDhms' then
			dFormat = dFormat .. ':s'
		end
	else
		dFormat = parseFormat(dFormat, day)
	end
	return dFormat
end

---------------------------------------------------------------------------------------
-- Look up proper format string to be passed to {{#time}} parser function
-- INPUTS:
--  * month : month number
--  * case  : gramatic case abbriviation, like "ins", "loc"
--  * lang  : language
-- OUTPUT:
--  * dFormat : input to {{#time}} function
local function MonthCase(month, case, lang)
	if month == nil or case == nil then
		return nil
	end
	local T = {{},{},{},{},{},{},{},{},{},{},{},{}}
	local tab = mw.ext.data.get('I18n/MonthCases.tab', lang)
	for _, row in pairs(tab.data) do
		local mth, cs, msg = unpack(row)
		T[mth][cs] = msg
	end
	return T[month][case]
end

-- ==================================================
-- === External functions ===========================
-- ==================================================
local p = {}

-- ===========================================================================
-- === Functions accesible from the outside to allow unit-testing
-- === Please do not use directly as they could change in the future
-- ===========================================================================

---------------------------------------------------------------------------------------
-- Single string replacement that ignores part of the string in "..."
function p.strReplace(String, old, new)
	if String:find('"') then
		local T={}
		for i, str in ipairs(mw.text.split( String, '"', true )) do
			if i%2==1 then
				str = str:gsub(old, new, 1)
			end
			table.insert(T, str)
		end
		return table.concat(T,'"')
	else
		return String:gsub(old, new, 1)
	end
end

---------------------------------------------------------------------------------------
-- process datevec
-- INPUT:
--  * datevec - Array of {year,month,day,hour,minute,second, tzhour, tzmin} containing broken 
--    down date-time component strings or numbers
-- OUTPUT:
--  * datenum - same array but holding only numbers or nuls
function p.clean_datevec(datevec)
	-- create datecode based on which variables are provided and check for out-of-bound values
	
	-- check special case of month provided as a name
	local month = datevec[2]
	if type(month) == 'string' and month ~= '' and not tonumber(month) then
		-- When the month is not a number, check if it's a month name in the project's language.
		datevec[2] = mw.getContentLanguage():formatDate('n', month)
	end
		
	-- check bounds
	local maxval = {  1/0, 12, 31, 23, 59, 59,  23, 59 } -- max values (or  1/0=+inf) for year, month, day, hour, minute, second, tzhour, tzmin
	local minval = { -1/0, 01, 01, 00, 00, 00, -23, 00 } -- min values (or -1/0=-inf) for year, month, ...
	local datenum  = {} -- date-time encoded as a vector = [year, month, ... , second, tzhour, tzmin]
	for i = 1, 8 do
        local val = tonumber(datevec[i])
        if val and val >= minval[i] and val <= maxval[i] then -- These tests work with infinite min/max values.
		    datenum[i] = val
		end
	end
	
	-- leap second
	if tonumber(datevec[6]) == 60 then -- leap second '60' is valid only at end of 23:59 UTC, on 30 June or 31 December of specific years
--		datenum[6] = 60 
		local MDhm = table.concat({unpack(datenum,2,5)}, ',')
	    if (MDhm == table.concat({6, 30, 23, 59}, ',')) or (MDhm == table.concat({12, 31, 23, 59}, ',')) then
		   datenum[6] = 60 
	    end
	end
	
	return datenum
end
	
---------------------------------------------------------------------------------------
-- process datevec
-- INPUT:
--  * datenum - Array of {year,month,day,hour,minute,second, tzhour, tzmin} as numbers or nuls
-- OUTPUT:
--  * timeStamp - date string in the format taken by mw.language:formatDate lua function and {{#time}} parser function
--       https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#mw.language:formatDate
--       https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions#.23time
--  * datecode - a code specifying content of the array where Y' is year, 'M' is month,
--     'D' is day, 'h' is hour, 'm' minute, 's' is second.
--     Output has to be one of YMDhms, YMDhm, YMD, YM, Y, MDhms, MDhm, MD, M.
function p.getTimestamp(datenum)

	-- create datecode based on datenum
	local codes  = { 'Y', 'M', 'D', 'h', 'm', 's'} 
	local datecode = '' -- a string signifying which combination of variables was provided
	for i, c in ipairs(codes) do
		datecode = datecode .. (datenum[i] and c or '') -- if datenum[i] than append codes[i] to datecode
	end

	-- create timestamp string (for example 2000-02-20 02:20:20) based on which variables were provided
	local timeStamp
    -- date starting by a year
	if datecode == 'YMDhms' then
		timeStamp = string.format('%04i-%02i-%02i %02i:%02i:%02i', datenum[1], datenum[2], datenum[3], datenum[4], datenum[5], datenum[6] )
	elseif datecode == 'YMDhm' then
		timeStamp = string.format('%04i-%02i-%02i %02i:%02i', datenum[1], datenum[2], datenum[3], datenum[4], datenum[5] )
	elseif datecode:sub(1,3)=='YMD' then
		timeStamp = string.format('%04i-%02i-%02i', datenum[1], datenum[2], datenum[3] )
		datecode  = 'YMD' -- 'YMDhms', 'YMDhm' and 'YMD' are the only supported format starting with 'YMD'; all others will be converted to 'YMD'.
	elseif datecode:sub(1,2) == 'YM' then
		timeStamp = string.format('%04i-%02i', datenum[1], datenum[2] )
		datecode  = 'YM' 
	elseif datecode:sub(1,1)=='Y' then
		timeStamp = string.format('%04i', datenum[1] )
		datecode  = 'Y' 
    -- date starting by a month (the implied year is 2000)
	elseif datecode== 'MDhms' then
		timeStamp = string.format('%04i-%02i-%02i %02i:%02i:%02i', 2000, datenum[2], datenum[3], datenum[4], datenum[5], datenum[6] )
	elseif datecode == 'MDhm' then
		timeStamp = string.format('%04i-%02i-%02i %02i:%02i', 2000, datenum[2], datenum[3], datenum[4], datenum[5] )
	elseif datecode:sub(1,2) == 'MD' then
		timeStamp = string.format('%04i-%02i-%02i', 2000, datenum[2], datenum[3] )
		datecode = 'MD' -- 'MDhms', 'MDhm' and 'MD' are the only supported format starting with 'MD'; all others will be converted to 'MD'
	elseif datecode:sub(1,1) == 'M' then -- Ambiguous: could mean minutes, but here means month (when parsed as a name/abbrev, not as a number).
		timeStamp = string.format('%04i-%02i-%02i', 2000, datenum[2], 1 )
		datecode  = 'M' 
    -- other possible but unrecognized formats (e.g. 'DHis', 'DHi', 'D', 'His', 'Hi');
    -- note that 'Dh', 'D', 'h', 's' may eventually work, but not 'm' for minute only, which is ambiguous with 'M' for month only.
	else
		timeStamp = nil -- format not supported
	end
	return timeStamp, datecode
end

local function isValidLangCode(lang)
	if not lang then
		return false
	end
	lang = mw.text.trim(lang)
	return lang ~= '' and lang ~= '⧼Lang⧽' and mw.language.isValidCode(lang)
end

-- ===========================================================================
-- === Version of the function to be called from other LUA codes
-- ===========================================================================

--[[ ========================================================================================
Date
 
This function is the core part of the ISOdate template. 
 
Usage:
  local Date = require('Module:DateI18n')._Date
  local dateStr = Date({2020, 12, 30, 12, 20, 11}, lang)
 
Parameters:
  * {year,month,day,hour,minute,second, tzhour, tzmin}: broken down date-time component strings or numbers
		tzhour, tzmin are timezone offsets from UTC, hours and minutes
  * lang: The language to display it in
  * case: Language format (genitive, etc.) for some languages
  * class: CSS class for the <time> node, use "" for no metadata at all
]]
function p._Date(datevec, lang, case, class, trim_year)
	-- make sure inputs are in the right format
	
	-- set language
	if not isValidLangCode(lang) then
		-- get user's chosen language
		-- equivalent to {{int:lang}}
		lang = mw.getCurrentFrame():callParserFunction("int", "lang")
		
		if not isValidLangCode(lang) then
			-- if that doesn't work, use the project language
			-- this is useful on projects which import this module from Commons
			lang = mw.language.getContentLanguage().code
			
			if not isValidLangCode(lang) then
				-- if that doesn't work, use English
				lang = "en"
			end
		end
	end
	if lang == 'be-tarask' then
		lang = 'be-x-old'
	end
	
	-- process datevec and extract timeStamp and datecode strings as well as numeric datenum array
	local datenum  = p.clean_datevec(datevec)
	local year, month, day = datenum[1], datenum[2], datenum[3]
	local timeStamp, datecode = p.getTimestamp(datenum)
	if not timeStamp then -- something went wrong in parserDatevec
		return ''
	end
	-- Commons [[Data:DateI18n.tab]] page stores prefered formats for diferent 
	-- languages and datecodes (specifying year-month-day or just year of month-day, etc)
	-- Look up country specific format input to {{#time}} function
	local dFormat = getDateFormat(datecode, day, lang)

	-- By default the gramatical case is not specified (case=='') allowing the format to be specified 
	-- in [[Data:DateI18n.tab]]. You can overwrite the default grammatical case of the month by 
	-- specifying "case" variable. This is needed mostly by Slavic languages to create more complex 
	-- phrases as it is done in [[c:Module:Complex date]]
	case = case or ''
	if (lang=='qu' or lang=='qug') and case=='nom' then
		-- Special case related to Quechua and Kichwa languages. The form in the I18n is
		--  Genitive case with suffix "pi" added to month names provided by {#time}}
		-- in Nominative case that "pi" should be removed
		-- see https://commons.wikimedia.org/wiki/Template_talk:Date#Quechua from 2014
		dFormat = dFormat:gsub('F"pi"', 'F')
	elseif case == 'gen' then
		dFormat = p.strReplace(dFormat, "F", "xg")
	elseif case == 'nom' then
		dFormat = p.strReplace(dFormat, "xg", "F")
	elseif case ~= '' and month ~= nil then
		-- see is page [[Data:I18n/MonthCases.tab]] on Commons have name of the month 
		-- in specific gramatic case in desired language. If we have it than replace 
		-- "F" and xg" in dFormat
		local monthMsg = MonthCase(month, case, lang)
		if  monthMsg and monthMsg ~= '' then -- make sure it exists
			dFormat = p.strReplace(dFormat, 'F',  '"'..monthMsg..'"') -- replace default month with month name we already looked up
			dFormat = p.strReplace(dFormat, 'xg', '"'..monthMsg..'"')
		end
	end

    -- Translate the date using specified format.
	-- See https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#mw.language:formatDate and 
	-- https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions##time for explanation of the format
	local langObj = mw.language.new(lang)
	local datestr = langObj:formatDate(dFormat, timeStamp) -- same as using {{#time}} parser function
	
	-- Special case related to Thai solar calendar: prior to 1940 new-year was at different time of year,
	-- so just year (datecode == 'Y') is ambiguous and is replaced by "YYYY or YYYY" phrase
	if lang=='th' and datecode=='Y' and year<=1940 then
		datestr = string.format('%04i หรือ %04i', year+542, year+543 ) 
	end
	
	-- If year < 1000 than either keep the date padded to the length of 4 digits or trim it.
	-- Decide if the year will stay padded with zeros (for years in 0-999 range).
	if year and year < 1000 then
		trim_year = yesno(trim_year, trim_year or '100-999')
		if type(trim_year) == 'string' then
			-- If `trim_year` not a simple boolean, then it's a range of dates.
			-- For example '100-999' means to pad 1-or-2-digit years to be 4-digit long, while keeping 3-digit years as is.
			local YMin, YMax = trim_year:match( '(%d+)-(%d+)' )
			trim_year = YMin and year >= tonumber(YMin) and year <= tonumber(YMax)
		end
		if trim_year then
			datestr = trimYear(datestr, year, lang) -- in datestr replace long year with trimmed one
		end
	end

	-- Append a timezone if present (after the hour and minute of the day).
	if datenum[7] and (datecode:sub(1, 5) == 'YMDhm' or datecode:sub(1, 4) == 'MDhm') then
		-- Use {{#time}} parser function to create timezone string, so that we use the correct character set.
		local sign = (datenum[7]<0) and '−' or '+'
		timeStamp = string.format("2000-01-01 %02i:%02i:00", math.abs(datenum[7]), datenum[8] or 0)
		local timezone = langObj:formatDate('H:i', timeStamp) -- same as using {{#time}} parser function
		datestr = string.format("%s %s%s", datestr, sign, timezone )
	end

	-- HTML formating of date string and tagging for microformats (only for absolute dates with a year).
	if class and class ~= '' and class ~= '-' and datecode:sub(1,1) == 'Y' then 
		local pat = '<time class="%s" datetime="%s" lang="%s" dir="%s" style="white-space:nowrap">%s</time>'
		datestr = pat:format(class, timeStamp, lang, langObj:getDir(), datestr)
	end
	return datestr
end

-- ===========================================================================
-- === Version of the function to be called from template namespace
-- ===========================================================================

--[[ ========================================================================================
Date
 
This function is the core part of the ISOdate template. 
 
Usage:
{{#invoke:DateI18n|Date|year=|month=|day=|hour=|minute=|second=|tzhour=|tzmin=|lang=en}}
 
Parameters:
  * year, month, day, hour, minute, second: broken down date-time component strings
  * tzhour, tzmin: timezone offset from UTC, hours and minutes
  * lang: The language to display it in
  * case: Language format (genitive, etc.) for some languages
  * class: CSS class for the <time> node, use "" for no metadata at all
]]
function p.Date(frame)
	-- get args
	local args = {}
	for key, value in pairs(frame.args) do 
		local trimmed_key = string.gsub(string.lower(mw.text.trim(key)), ' ', '_')
		local trimmed_value = mw.text.trim(value)
		if trimmed_key ~= 'class' and trimmed_value == '' then
			trimmed_value = nil
		end
		args[trimmed_key] = trimmed_value
	end
	
	-- default values
	-- Allows to set the html class of the time node where the date is included. This is useful for microformats.
	args.class = args.class or '-'
	if args.class == '' then
		args.class = 'dtstart'
	end
	-- By default, pad one- and two-digit years to be 4 digits long, while keeping three-digit years as-is.
	args.trim_year = args.trim_year or '100-999'
	
	return p._Date(	
		{args.year, args.month, args.day, args.hour, args.minute, args.second, args.tzhour, args.tzmin},
		args.lang,
		args.case,	
		args.class,
		args.trim_year
	)	
end

return p