Module:DateI18n: Difference between revisions

From WWII Archives

w>Legoktm
(Replace Module:No globals with require( "strict" ))
 
(use require('strict') instead of require('Module:No globals'))
Line 15: Line 15:
* User:Jarekt - original version of the functions mimicking template:Date  
* User:Jarekt - original version of the functions mimicking template:Date  
]]
]]
-- =======================================
-- === Dependencies ======================
-- =======================================
require('strict')
require('strict')


-- ==================================================
-- =======================================
-- === Internal functions ===========================
-- === Local Functions ===================
-- ==================================================
-- =======================================


-- Function allowing for consistent treatment of boolean-like wikitext input.
------------------------------------------------------------------------------
-- It works similarly to Module:Yesno
--[[ (copied from Module:Core)
Function allowing for consistent treatment of boolean-like wikitext input.
Inputs:
  1) val - value to be evaluated, outputs as a function of values:
true  : true  (boolean), 1 (number), or strings: "yes", "y", "true", "1"
false : false (boolean), 0 (number), or strings: "no", "n", "false", "0"
  2) default - value to return otherwise
See Also: It works similarly to Module:Yesno
]]
local function yesno(val, default)
local function yesno(val, default)
if type(val) == 'boolean' then
if type(val) == 'boolean' then
return val
return val
elseif type(val) == 'number' then
elseif type(val) == 'number' then
if val==1 then  
val = tostring(val)
return true
end
elseif val==0 then
if type(val) == 'string' then
return false
local LUT = {
yes=true , y=true , ['true'] =true , t=true , ['1']=true , on =true,
no =false, n=false, ['false']=false, f=false, ['0']=false, off=false }
    val = LUT[mw.ustring.lower(val)]  -- put in lower case
    if (val~=nil) then
return val
end
end
elseif type(val) == 'string' then
    val = mw.ustring.lower(val)  -- put in lower case
    if val == 'no'  or val == 'n' or val == 'false' or tonumber(val) == 0 then
        return false
    elseif val == 'yes' or val == 'y' or val == 'true'  or tonumber(val) == 1 then
        return true
    end
     end
     end
     return default
     return default
end
end


---------------------------------------------------------------------------------------
-- String replacement that ignores part of the string in "..."
local function strReplace(String, old, new)
if String:find('"') then
local T={}
for i, str in ipairs(mw.text.split( String, '"', true )) do
if i%2==1 then
str = str:gsub(old, new)
end
table.insert(T, str)
end
return table.concat(T,'"')
else
return String:gsub(old, new)
end
end
---------------------------------------------------------------------------------------
-- process datevec
-- INPUT:
--  * datevec - Array of {year,month,day,hour,minute,second, tzhour, tzmin} containing broken
--    down date-time component strings or numbers
-- OUTPUT:
--  * datecode - a code specifying content of the array where Y' is year, 'M' is month,
--    'D' is day, 'H' is hour, 'M' minute, 'S' is second. output has to be one of YMDHMS, YMDHM, YMD, YM, MD, Y
--  * datenum - same array but holding only numbers or nuls
local function parserDatevec(datevec)
-- if month is not a number than check if it is a month name in project's language
local month = datevec[2]
if month and month~='' and not tonumber(month) then
datevec[2] = mw.getContentLanguage():formatDate( "n", month)
end
-- create datecode based on which variables are provided and check for out-of-bound values
local maxval = {nil, 12, 31, 23, 59, 59,  23, 59} -- max values for year, month, ...
local minval = {nil,  1,  1,  0,  0,  0, -23,  0} -- min values for year, month, ...
local c = {'Y', 'M', 'D', 'H', 'M', 'S', '', ''}
local datecode = '' -- a string signifying which combination of variables was provided
local datenum = {}  -- date-time encoded as a vector = [year, month, ... , second]
for i = 1,8 do
datenum[i] = tonumber(datevec[i])
if datenum[i] and (i==1 or (datenum[i]>=minval[i] and datenum[i]<=maxval[i])) then
datecode = datecode .. c[i]
end
end
return datecode, datenum
end
---------------------------------------------------------------------------------------
-- process datevec
-- INPUT:
--  * datecode - a code specifying content of the array where Y' is year, 'M' is month,
--    'D' is day, 'H' is hour, 'M' minute, 'S' is second. output has to be one of YMDHMS, YMDHM, YMD, YM, MD, Y
--  * datenum - Array of {year,month,day,hour,minute,second, tzhour, tzmin} as numbers or nuls
-- OUTPUT:
--  * timeStamp - date string in the format taken by mw.language:formatDate lua function and {{#time}} perser function
--      https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#mw.language:formatDate
--      https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions#.23time
--  * datecode - with possible corrections
local function getTimestamp(datecode, datenum)
-- create time stamp string (for example 2000-02-20 02:20:20) based on which variables were provided
local timeStamp
if datecode == 'YMDHMS' then
timeStamp = string.format('%04i-%02i-%02i %02i:%02i:%02i', datenum[1], datenum[2], datenum[3], datenum[4], datenum[5], datenum[6] )
elseif datecode == 'YMDHM' then
timeStamp = string.format('%04i-%02i-%02i %02i:%02i', datenum[1], datenum[2], datenum[3], datenum[4], datenum[5] )
elseif datecode:sub(1,3)=='YMD' then
timeStamp = string.format('%04i-%02i-%02i', datenum[1], datenum[2], datenum[3] )
datecode = 'YMD' -- 'YMD', 'YMDHMS' and 'YMDHM' are the only supported format starting with 'YMD'. All others will be converted to 'YMD'
elseif datecode == 'YM' then
timeStamp = string.format('%04i-%02i', datenum[1], datenum[2] )
elseif datecode:sub(1,1)=='Y' then
timeStamp = string.format('%04i', datenum[1] )
datecode = 'Y'
elseif datecode == 'M' then
timeStamp = string.format('%04i-%02i-%02i', 2000, datenum[2], 1 )
elseif datecode == 'MD' then
timeStamp = string.format('%04i-%02i-%02i', 2000, datenum[2], datenum[3] )
else
timeStamp = nil -- format not supported
end
return timeStamp, datecode
end


---------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------
Line 159: Line 87:
-- Look up proper format string to be passed to {{#time}} parser function
-- Look up proper format string to be passed to {{#time}} parser function
-- INPUTS:
-- INPUTS:
--  * datecode: YMDHMS, YMDHM, YMD, YM, MD, Y, or M
--  * datecode: YMDhms, YMDhm, YMD, YM, Y, MDhms, MDhm, MD, or M
--  * day    : Number between 1 and 31 (not needed for most languages)
--  * day    : Number between 1 and 31 (not needed for most languages)
--  * lang    : language
--  * lang    : language
Line 167: Line 95:
local function parseFormat(dFormat, day)
local function parseFormat(dFormat, day)
if dFormat:find('default') and #dFormat>10 then
if dFormat:find('default') and #dFormat>10 then
-- special (and messy) case of dFormat code depending on a day number
-- Special (and messy) case of dFormat code depending on a day number, where data is a
-- then json contains a string with more json containing "default" field and 2 digit day keys
-- JSON-encoded table {”default”:”*”,”dDD”:”*”} including fields for specific 2-digit days.
-- if desired day is not in that json than use "default" case
-- Change curly double quotes (possibly used for easier editing in tabular data) in dFormat
dFormat = dFormat:gsub('”','"') -- change fancy double quote to a straight one, used for json marking
-- to straight ASCII double quotes (required for parsing of this JSON-encoded table).
local D = mw.text.jsonDecode( dFormat ) --com = mw.dumpObject(D)
local D = mw.text.jsonDecode(mw.ustring.gsub(dFormat, '[„“‟”]', '"')) --com = mw.dumpObject(D)
day = string.format('d%02i',day) -- create day key
-- If the desired day is not in that JSON table, then use its "default" case.
dFormat = D[day] or D.default
dFormat = D[string.format('d%02i', day)] or D.default
dFormat = dFormat:gsub("'", '"') -- change single quote to a double quote, used for {{#time}} marking
            -- Change ASCII single quotes to ASCII double quotes used for {{#time}} marking.
            -- Apostrophes needed in plain-text must not use ASCII single quotes but curly apostrophe
            -- e.g. { ‟default”: ‟j”, ‟d01”: ‟j’'o'” }, not { ‟default”: ‟j”, ‟d01”: ‟j''o'” }.
end
end
dFormat = dFormat:gsub("'", '"')
return dFormat
return dFormat
end
end
Line 185: Line 116:
T[id] = msg
T[id] = msg
end
end
    -- Compatibility of legacy data using 'HMS' or 'HM', where 'M' is ambiguous
    T.YMDhms = T.YMDhms or T.YMDHMS
    T.YMDhm  = T.YMDhm  or T.YMDHM
    datecode = datecode == 'YMDHMS' and 'YMDhms' or datecode == 'YMDHM' and 'YMDhm' or datecode
local dFormat = T[datecode]
local dFormat = T[datecode]
if dFormat=='default' and (datecode=='YMDHMS' or datecode=='YMDHM')  then  
if dFormat == 'default' and (datecode == 'YMDhms' or datecode == 'YMDhm')  then  
-- for most languages adding hour:minute:second is done by adding ", HH:MM:SS to the  
-- For most languages adding hour:minute:second is done by adding ", HH:ii:ss to the  
-- day precission date, those languages are skipped in DateI18n.tab and default to  
-- day precission date, those languages are skipped in DateI18n.tab and default to  
-- English which stores word "default"
-- English which stores word "default"
dFormat = parseFormat(T['YMD'], day).. ', H:i'
dFormat = parseFormat(T['YMD'], day).. ', H:i'
if datecode=='YMDHMS' then
if datecode == 'YMDhms' then
dFormat = dFormat .. ':s'
dFormat = dFormat .. ':s'
end
end
Line 209: Line 145:
--  * dFormat : input to {{#time}} function
--  * dFormat : input to {{#time}} function
local function MonthCase(month, case, lang)
local function MonthCase(month, case, lang)
if month == nil or case == nil then
return nil
end
local T = {{},{},{},{},{},{},{},{},{},{},{},{}}
local T = {{},{},{},{},{},{},{},{},{},{},{},{}}
local tab = mw.ext.data.get('I18n/MonthCases.tab', lang)
local tab = mw.ext.data.get('I18n/MonthCases.tab', lang)
Line 222: Line 161:
-- ==================================================
-- ==================================================
local p = {}
local p = {}
-- ===========================================================================
-- === Functions accesible from the outside to allow unit-testing
-- === Please do not use directly as they could change in the future
-- ===========================================================================
---------------------------------------------------------------------------------------
-- Single string replacement that ignores part of the string in "..."
function p.strReplace(String, old, new)
if String:find('"') then
local T={}
for i, str in ipairs(mw.text.split( String, '"', true )) do
if i%2==1 then
str = str:gsub(old, new, 1)
end
table.insert(T, str)
end
return table.concat(T,'"')
else
return String:gsub(old, new, 1)
end
end
---------------------------------------------------------------------------------------
-- process datevec
-- INPUT:
--  * datevec - Array of {year,month,day,hour,minute,second, tzhour, tzmin} containing broken
--    down date-time component strings or numbers
-- OUTPUT:
--  * datenum - same array but holding only numbers or nuls
function p.clean_datevec(datevec)
-- create datecode based on which variables are provided and check for out-of-bound values
-- check special case of month provided as a name
local month = datevec[2]
if type(month) == 'string' and month ~= '' and not tonumber(month) then
-- When the month is not a number, check if it's a month name in the project's language.
datevec[2] = mw.getContentLanguage():formatDate('n', month)
end
-- check bounds
local maxval = {  1/0, 12, 31, 23, 59, 59,  23, 59 } -- max values (or  1/0=+inf) for year, month, day, hour, minute, second, tzhour, tzmin
local minval = { -1/0, 01, 01, 00, 00, 00, -23, 00 } -- min values (or -1/0=-inf) for year, month, ...
local datenum  = {} -- date-time encoded as a vector = [year, month, ... , second, tzhour, tzmin]
for i = 1, 8 do
        local val = tonumber(datevec[i])
        if val and val >= minval[i] and val <= maxval[i] then -- These tests work with infinite min/max values.
    datenum[i] = val
end
end
-- leap second
if tonumber(datevec[6]) == 60 then -- leap second '60' is valid only at end of 23:59 UTC, on 30 June or 31 December of specific years
-- datenum[6] = 60
local MDhm = table.concat({unpack(datenum,2,5)}, ',')
    if (MDhm == table.concat({6, 30, 23, 59}, ',')) or (MDhm == table.concat({12, 31, 23, 59}, ',')) then
  datenum[6] = 60
    end
end
return datenum
end
---------------------------------------------------------------------------------------
-- process datevec
-- INPUT:
--  * datenum - Array of {year,month,day,hour,minute,second, tzhour, tzmin} as numbers or nuls
-- OUTPUT:
--  * timeStamp - date string in the format taken by mw.language:formatDate lua function and {{#time}} parser function
--      https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#mw.language:formatDate
--      https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions#.23time
--  * datecode - a code specifying content of the array where Y' is year, 'M' is month,
--    'D' is day, 'h' is hour, 'm' minute, 's' is second.
--    Output has to be one of YMDhms, YMDhm, YMD, YM, Y, MDhms, MDhm, MD, M.
function p.getTimestamp(datenum)
-- create datecode based on datenum
local codes  = { 'Y', 'M', 'D', 'h', 'm', 's'}
local datecode = '' -- a string signifying which combination of variables was provided
for i, c in ipairs(codes) do
datecode = datecode .. (datenum[i] and c or '') -- if datenum[i] than append codes[i] to datecode
end
-- create timestamp string (for example 2000-02-20 02:20:20) based on which variables were provided
local timeStamp
    -- date starting by a year
if datecode == 'YMDhms' then
timeStamp = string.format('%04i-%02i-%02i %02i:%02i:%02i', datenum[1], datenum[2], datenum[3], datenum[4], datenum[5], datenum[6] )
elseif datecode == 'YMDhm' then
timeStamp = string.format('%04i-%02i-%02i %02i:%02i', datenum[1], datenum[2], datenum[3], datenum[4], datenum[5] )
elseif datecode:sub(1,3)=='YMD' then
timeStamp = string.format('%04i-%02i-%02i', datenum[1], datenum[2], datenum[3] )
datecode  = 'YMD' -- 'YMDhms', 'YMDhm' and 'YMD' are the only supported format starting with 'YMD'; all others will be converted to 'YMD'.
elseif datecode:sub(1,2) == 'YM' then
timeStamp = string.format('%04i-%02i', datenum[1], datenum[2] )
datecode  = 'YM'
elseif datecode:sub(1,1)=='Y' then
timeStamp = string.format('%04i', datenum[1] )
datecode  = 'Y'
    -- date starting by a month (the implied year is 2000)
elseif datecode== 'MDhms' then
timeStamp = string.format('%04i-%02i-%02i %02i:%02i:%02i', 2000, datenum[2], datenum[3], datenum[4], datenum[5], datenum[6] )
elseif datecode == 'MDhm' then
timeStamp = string.format('%04i-%02i-%02i %02i:%02i', 2000, datenum[2], datenum[3], datenum[4], datenum[5] )
elseif datecode:sub(1,2) == 'MD' then
timeStamp = string.format('%04i-%02i-%02i', 2000, datenum[2], datenum[3] )
datecode = 'MD' -- 'MDhms', 'MDhm' and 'MD' are the only supported format starting with 'MD'; all others will be converted to 'MD'
elseif datecode:sub(1,1) == 'M' then -- Ambiguous: could mean minutes, but here means month (when parsed as a name/abbrev, not as a number).
timeStamp = string.format('%04i-%02i-%02i', 2000, datenum[2], 1 )
datecode  = 'M'
    -- other possible but unrecognized formats (e.g. 'DHis', 'DHi', 'D', 'His', 'Hi');
    -- note that 'Dh', 'D', 'h', 's' may eventually work, but not 'm' for minute only, which is ambiguous with 'M' for month only.
else
timeStamp = nil -- format not supported
end
return timeStamp, datecode
end
-- ===========================================================================
-- === Version of the function to be called from other LUA codes
-- ===========================================================================


--[[ ========================================================================================
--[[ ========================================================================================
Line 244: Line 304:
lang = mw.getCurrentFrame():callParserFunction( "int", "lang" ) -- get user's chosen language
lang = mw.getCurrentFrame():callParserFunction( "int", "lang" ) -- get user's chosen language
end
end
if lang == 'be-tarsk' then
if lang == 'be-tarask' then
lang = 'be-x-old'
lang = 'be-x-old'
end
end
-- process datevec and extract timeStamp and datecode strings as well as numeric datenum array
-- process datevec and extract timeStamp and datecode strings as well as numeric datenum array
local datecode,  datenum  = parserDatevec(datevec)
local datenum  = p.clean_datevec(datevec)
local year, month, day = datenum[1], datenum[2], datenum[3]
local year, month, day = datenum[1], datenum[2], datenum[3]
local timeStamp, datecode = getTimestamp(datecode, datenum)
local timeStamp, datecode = p.getTimestamp(datenum)
if not timeStamp then -- something went wrong in parserDatevec
if not timeStamp then -- something went wrong in parserDatevec
return ''
return ''
Line 265: Line 325:
-- phrases as it is done in [[c:Module:Complex date]]
-- phrases as it is done in [[c:Module:Complex date]]
case = case or ''
case = case or ''
if (lang=='qu' or lang=='qug') and (case=='nom') then
if (lang=='qu' or lang=='qug') and case=='nom' then
-- Special case related to Quechua and Kichwa languages. The form in the I18n is
-- Special case related to Quechua and Kichwa languages. The form in the I18n is
--  Genitive case with suffix "pi" added to month names provided by {#time}}
--  Genitive case with suffix "pi" added to month names provided by {#time}}
Line 271: Line 331:
-- see https://commons.wikimedia.org/wiki/Template_talk:Date#Quechua from 2014
-- see https://commons.wikimedia.org/wiki/Template_talk:Date#Quechua from 2014
dFormat = dFormat:gsub('F"pi"', 'F')
dFormat = dFormat:gsub('F"pi"', 'F')
elseif (case=='gen') then
elseif case == 'gen' then
dFormat = strReplace(dFormat, "F", "xg")
dFormat = p.strReplace(dFormat, "F", "xg")
elseif (case=='nom') then
elseif case == 'nom' then
dFormat = strReplace(dFormat, "xg", "F")
dFormat = p.strReplace(dFormat, "xg", "F")
elseif (case ~= '') then
elseif case ~= '' and month ~= nil then
-- see is page [[Data:I18n/MonthCases.tab]] on Commons have name of the month  
-- see is page [[Data:I18n/MonthCases.tab]] on Commons have name of the month  
-- in specific gramatic case in desired language. If we have it than replace  
-- in specific gramatic case in desired language. If we have it than replace  
Line 281: Line 341:
local monthMsg = MonthCase(month, case, lang)
local monthMsg = MonthCase(month, case, lang)
if  monthMsg and monthMsg ~= '' then -- make sure it exists
if  monthMsg and monthMsg ~= '' then -- make sure it exists
dFormat = strReplace(dFormat, 'F',  '"'..monthMsg..'"') -- replace default month with month name we already looked up
dFormat = p.strReplace(dFormat, 'F',  '"'..monthMsg..'"') -- replace default month with month name we already looked up
dFormat = strReplace(dFormat, 'xg', '"'..monthMsg..'"')
dFormat = p.strReplace(dFormat, 'xg', '"'..monthMsg..'"')
end
end
end
end


     -- Translate the date using specified format
     -- Translate the date using specified format.
-- See https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#mw.language:formatDate and  
-- See https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#mw.language:formatDate and  
-- https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions##time for explanation of the format
-- https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions##time for explanation of the format
local datestr = mw.language.new(lang):formatDate( dFormat, timeStamp) -- same as using {{#time}} parser function
local langObj = mw.language.new(lang)
local datestr = langObj:formatDate(dFormat, timeStamp) -- same as using {{#time}} parser function
-- Special case related to Thai solar calendar: prior to 1940 new-year was at different  
-- Special case related to Thai solar calendar: prior to 1940 new-year was at different time of year,
-- time of year, so just year (datecode=='Y') is ambiguous and is replaced by "YYYY or YYYY" phrase
-- so just year (datecode == 'Y') is ambiguous and is replaced by "YYYY or YYYY" phrase
if lang=='th' and datecode=='Y' and year<=1940 then
if lang=='th' and datecode=='Y' and year<=1940 then
datestr = string.format('%04i หรือ %04i', year+542, year+543 )  
datestr = string.format('%04i หรือ %04i', year+542, year+543 )  
end
end
-- If year<1000 than either keep the date padded to the length of 4 digits or trim it
-- If year < 1000 than either keep the date padded to the length of 4 digits or trim it.
-- decide if the year will stay padded with zeros (for years in 0-999 range)
-- Decide if the year will stay padded with zeros (for years in 0-999 range).
if year and year<1000 then
if year and year < 1000 then
if type(trim_year)=='nil' then
trim_year = yesno(trim_year, trim_year or '100-999')
trim_year = '100-999'
if type(trim_year) == 'string' then
end
-- If `trim_year` not a simple boolean, then it's a range of dates.
local trim = yesno(trim_year,nil) -- convert to boolean
-- For example '100-999' means to pad 1-or-2-digit years to be 4-digit long, while keeping 3-digit years as is.
if trim==nil and type(trim_year)=='string' then
-- if "trim_year" not a simple True/False than it is range of dates
-- for example '100-999' means to pad one and 2 digit years to be 4 digit long, while keeping 3 digit years as is
local YMin, YMax = trim_year:match( '(%d+)-(%d+)' )
local YMin, YMax = trim_year:match( '(%d+)-(%d+)' )
trim = (YMin~=nil and year>=tonumber(YMin) and year<=tonumber(YMax))  
trim_year = YMin and year >= tonumber(YMin) and year <= tonumber(YMax)
end
end
if trim==true then
if trim_year then
datestr = trimYear(datestr, year, lang) -- in datestr replace long year with trimmed one
datestr = trimYear(datestr, year, lang) -- in datestr replace long year with trimmed one
end
end
end
end


-- append timezone if present
-- Append a timezone if present (after the hour and minute of the day).
if datenum[7] and (datecode == 'YMDHMS' or datecode == 'YMDHM') then
if datenum[7] and (datecode:sub(1, 5) == 'YMDhm' or datecode:sub(1, 4) == 'MDhm') then
-- use {{#time}} parser function to create timezone string, so that we use correct character set
-- Use {{#time}} parser function to create timezone string, so that we use the correct character set.
local sign = (datenum[7]<0) and '−' or '+'
local sign = (datenum[7]<0) and '−' or '+'
timeStamp = string.format("2000-01-01 %02i:%02i:00", math.abs(datenum[7]), datenum[8] or 0)
timeStamp = string.format("2000-01-01 %02i:%02i:00", math.abs(datenum[7]), datenum[8] or 0)
local timezone = mw.language.new(lang):formatDate( 'H:i', timeStamp) -- same as using {{#time}} parser function
local timezone = langObj:formatDate('H:i', timeStamp) -- same as using {{#time}} parser function
datestr = string.format("%s %s%s", datestr, sign, timezone )
datestr = string.format("%s %s%s", datestr, sign, timezone )
end
end


-- html formating and tagging of date string
-- HTML formating of date string and tagging for microformats (only for absolute dates with a year).
if class and class ~= '' and datecode~='M' and datecode~='MD'then
if class and class ~= '' and class ~= '-' and datecode:sub(1,1) == 'Y' then  
local DateHtmlTags = '<span style="white-space:nowrap"><time class="%s" datetime="%s">%s</time></span>'
local pat = '<time class="%s" datetime="%s" lang="%s" dir="%s" style="white-space:nowrap">%s</time>'
datestr = DateHtmlTags:format(class, timeStamp, datestr)
datestr = pat:format(class, timeStamp, lang, langObj:getDir(), datestr)
end
end
return datestr
return datestr
end
end
-- ===========================================================================
-- === Versions of the function to be called from template namespace
-- ===========================================================================


--[[ ========================================================================================
--[[ ========================================================================================

Revision as of 13:20, 30 October 2022

Documentation for this module may be created at Module:DateI18n/doc

--[[  
  __  __           _       _        ____        _       ___ _  ___        
 |  \/  | ___   __| |_   _| | ___ _|  _ \  __ _| |_ ___|_ _/ |( _ ) _ __  
 | |\/| |/ _ \ / _` | | | | |/ _ (_) | | |/ _` | __/ _ \| || |/ _ \| '_ \ 
 | |  | | (_) | (_| | |_| | |  __/_| |_| | (_| | ||  __/| || | (_) | | | |
 |_|  |_|\___/ \__,_|\__,_|_|\___(_)____/ \__,_|\__\___|___|_|\___/|_| |_|
  
This module is intended for processing of date strings.

Please do not modify this code without applying the changes first at Module:Date/sandbox and testing 
at Module:Date/sandbox/testcases and Module talk:Date/sandbox/testcases.

Authors and maintainers:
* User:Parent5446 - original version of the function mimicking template:ISOdate
* User:Jarekt - original version of the functions mimicking template:Date 
]]

-- =======================================
-- === Dependencies ======================
-- =======================================

require('strict')

-- =======================================
-- === Local Functions ===================
-- =======================================

------------------------------------------------------------------------------
--[[ (copied from Module:Core)
Function allowing for consistent treatment of boolean-like wikitext input.
Inputs:
  1) val - value to be evaluated, outputs as a function of values:
		true  : true  (boolean), 1 (number), or strings: "yes", "y", "true", "1"
		false : false (boolean), 0 (number), or strings: "no", "n", "false", "0"
  2) default - value to return otherwise
See Also: It works similarly to Module:Yesno
]]
local function yesno(val, default)
	if type(val) == 'boolean' then
		return val
	elseif type(val) == 'number' then
		val = tostring(val)
	end
	if type(val) == 'string' then
		local LUT = {
			yes=true , y=true , ['true'] =true , t=true , ['1']=true , on =true,
			no =false, n=false, ['false']=false, f=false, ['0']=false, off=false }
	    val = LUT[mw.ustring.lower(val)]  -- put in lower case
	    if (val~=nil) then
			return val
		end
    end
    return default
end


---------------------------------------------------------------------------------------
-- trim leading zeros in years prior to year 1000
-- INPUT:
--  * datestr   - translated date string 
--  * lang      - language of translation
-- OUTPUT:
--  * datestr - updated date string 

local function trimYear(datestr, year, lang)
	local yearStr0, yearStr1, yearStr2, zeroStr
	yearStr0 = string.format('%04i', year ) -- 4 digit year in standard form "0123"
	yearStr1 = mw.language.new(lang):formatDate( 'Y', yearStr0) -- same as calling {{#time}} parser function
	--yearStr1 = mw.getCurrentFrame():callParserFunction( "#time", { 'Y', yearStr0, lang } ) -- translate to a language 
	if yearStr0==yearStr1 then -- most of languages use standard form of year 
		yearStr2 = tostring(year)
	else -- some languages use different characters for numbers
		yearStr2 = yearStr1
		zeroStr = mw.ustring.sub(yearStr1,1,1) -- get "0" in whatever language
		for i=1,3 do -- trim leading zeros
			if mw.ustring.sub(yearStr2,1,1)==zeroStr then
				yearStr2 = mw.ustring.sub(yearStr2, 2, 5-i)
			else
				break
			end
		end
	end
	return string.gsub(datestr, yearStr1, yearStr2 ) -- in datestr replace long year with trimmed one
end

---------------------------------------------------------------------------------------
-- Look up proper format string to be passed to {{#time}} parser function
-- INPUTS:
--  * datecode: YMDhms, YMDhm, YMD, YM, Y, MDhms, MDhm, MD, or M
--  * day     : Number between 1 and 31 (not needed for most languages)
--  * lang    : language
-- OUTPUT:
--  * dFormat : input to {{#time}} function
local function getDateFormat(datecode, day, lang)
	local function parseFormat(dFormat, day)
		if dFormat:find('default') and #dFormat>10 then
			-- Special (and messy) case of dFormat code depending on a day number, where data is a
			-- JSON-encoded table {”default”:”*”,”dDD”:”*”} including fields for specific 2-digit days.
			-- Change curly double quotes (possibly used for easier editing in tabular data) in dFormat
			-- to straight ASCII double quotes (required for parsing of this JSON-encoded table).
			local D = mw.text.jsonDecode(mw.ustring.gsub(dFormat, '[„“‟”]', '"')) --com = mw.dumpObject(D)
			-- If the desired day is not in that JSON table, then use its "default" case.
			dFormat = D[string.format('d%02i', day)] or D.default
            -- Change ASCII single quotes to ASCII double quotes used for {{#time}} marking.
            -- Apostrophes needed in plain-text must not use ASCII single quotes but curly apostrophe
            -- e.g. { ‟default”: ‟j”, ‟d01”: ‟j’'o'” }, not { ‟default”: ‟j”, ‟d01”: ‟j''o'” }.
		end
		dFormat = dFormat:gsub("'", '"')
		return dFormat
	end
	
	local T = {}
	local tab = mw.ext.data.get('DateI18n.tab', lang)
	for _, row in pairs(tab.data) do -- convert the output into a dictionary table
		local id, _, msg = unpack(row)
		T[id] = msg
	end
    -- Compatibility of legacy data using 'HMS' or 'HM', where 'M' is ambiguous
    T.YMDhms = T.YMDhms or T.YMDHMS
    T.YMDhm  = T.YMDhm  or T.YMDHM
    datecode = datecode == 'YMDHMS' and 'YMDhms' or datecode == 'YMDHM' and 'YMDhm' or datecode

	local dFormat = T[datecode]
	if dFormat == 'default' and (datecode == 'YMDhms' or datecode == 'YMDhm')  then 
		-- For most languages adding hour:minute:second is done by adding ", HH:ii:ss to the 
		-- day precission date, those languages are skipped in DateI18n.tab and default to 
		-- English which stores word "default"
		dFormat = parseFormat(T['YMD'], day).. ', H:i'
		if datecode == 'YMDhms' then
			dFormat = dFormat .. ':s'
		end
	else
		dFormat = parseFormat(dFormat, day)
	end
	return dFormat
end

---------------------------------------------------------------------------------------
-- Look up proper format string to be passed to {{#time}} parser function
-- INPUTS:
--  * month : month number
--  * case  : gramatic case abbriviation, like "ins", "loc"
--  * lang  : language
-- OUTPUT:
--  * dFormat : input to {{#time}} function
local function MonthCase(month, case, lang)
	if month == nil or case == nil then
		return nil
	end
	local T = {{},{},{},{},{},{},{},{},{},{},{},{}}
	local tab = mw.ext.data.get('I18n/MonthCases.tab', lang)
	for _, row in pairs(tab.data) do
		local mth, cs, msg = unpack(row)
		T[mth][cs] = msg
	end
	return T[month][case]
end

-- ==================================================
-- === External functions ===========================
-- ==================================================
local p = {}

-- ===========================================================================
-- === Functions accesible from the outside to allow unit-testing
-- === Please do not use directly as they could change in the future
-- ===========================================================================

---------------------------------------------------------------------------------------
-- Single string replacement that ignores part of the string in "..."
function p.strReplace(String, old, new)
	if String:find('"') then
		local T={}
		for i, str in ipairs(mw.text.split( String, '"', true )) do
			if i%2==1 then
				str = str:gsub(old, new, 1)
			end
			table.insert(T, str)
		end
		return table.concat(T,'"')
	else
		return String:gsub(old, new, 1)
	end
end

---------------------------------------------------------------------------------------
-- process datevec
-- INPUT:
--  * datevec - Array of {year,month,day,hour,minute,second, tzhour, tzmin} containing broken 
--    down date-time component strings or numbers
-- OUTPUT:
--  * datenum - same array but holding only numbers or nuls
function p.clean_datevec(datevec)
	-- create datecode based on which variables are provided and check for out-of-bound values
	
	-- check special case of month provided as a name
	local month = datevec[2]
	if type(month) == 'string' and month ~= '' and not tonumber(month) then
		-- When the month is not a number, check if it's a month name in the project's language.
		datevec[2] = mw.getContentLanguage():formatDate('n', month)
	end
		
	-- check bounds
	local maxval = {  1/0, 12, 31, 23, 59, 59,  23, 59 } -- max values (or  1/0=+inf) for year, month, day, hour, minute, second, tzhour, tzmin
	local minval = { -1/0, 01, 01, 00, 00, 00, -23, 00 } -- min values (or -1/0=-inf) for year, month, ...
	local datenum  = {} -- date-time encoded as a vector = [year, month, ... , second, tzhour, tzmin]
	for i = 1, 8 do
        local val = tonumber(datevec[i])
        if val and val >= minval[i] and val <= maxval[i] then -- These tests work with infinite min/max values.
		    datenum[i] = val
		end
	end
	
	-- leap second
	if tonumber(datevec[6]) == 60 then -- leap second '60' is valid only at end of 23:59 UTC, on 30 June or 31 December of specific years
--		datenum[6] = 60 
		local MDhm = table.concat({unpack(datenum,2,5)}, ',')
	    if (MDhm == table.concat({6, 30, 23, 59}, ',')) or (MDhm == table.concat({12, 31, 23, 59}, ',')) then
		   datenum[6] = 60 
	    end
	end
	
	return datenum
end
	
---------------------------------------------------------------------------------------
-- process datevec
-- INPUT:
--  * datenum - Array of {year,month,day,hour,minute,second, tzhour, tzmin} as numbers or nuls
-- OUTPUT:
--  * timeStamp - date string in the format taken by mw.language:formatDate lua function and {{#time}} parser function
--       https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#mw.language:formatDate
--       https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions#.23time
--  * datecode - a code specifying content of the array where Y' is year, 'M' is month,
--     'D' is day, 'h' is hour, 'm' minute, 's' is second.
--     Output has to be one of YMDhms, YMDhm, YMD, YM, Y, MDhms, MDhm, MD, M.
function p.getTimestamp(datenum)

	-- create datecode based on datenum
	local codes  = { 'Y', 'M', 'D', 'h', 'm', 's'} 
	local datecode = '' -- a string signifying which combination of variables was provided
	for i, c in ipairs(codes) do
		datecode = datecode .. (datenum[i] and c or '') -- if datenum[i] than append codes[i] to datecode
	end

	-- create timestamp string (for example 2000-02-20 02:20:20) based on which variables were provided
	local timeStamp
    -- date starting by a year
	if datecode == 'YMDhms' then
		timeStamp = string.format('%04i-%02i-%02i %02i:%02i:%02i', datenum[1], datenum[2], datenum[3], datenum[4], datenum[5], datenum[6] )
	elseif datecode == 'YMDhm' then
		timeStamp = string.format('%04i-%02i-%02i %02i:%02i', datenum[1], datenum[2], datenum[3], datenum[4], datenum[5] )
	elseif datecode:sub(1,3)=='YMD' then
		timeStamp = string.format('%04i-%02i-%02i', datenum[1], datenum[2], datenum[3] )
		datecode  = 'YMD' -- 'YMDhms', 'YMDhm' and 'YMD' are the only supported format starting with 'YMD'; all others will be converted to 'YMD'.
	elseif datecode:sub(1,2) == 'YM' then
		timeStamp = string.format('%04i-%02i', datenum[1], datenum[2] )
		datecode  = 'YM' 
	elseif datecode:sub(1,1)=='Y' then
		timeStamp = string.format('%04i', datenum[1] )
		datecode  = 'Y' 
    -- date starting by a month (the implied year is 2000)
	elseif datecode== 'MDhms' then
		timeStamp = string.format('%04i-%02i-%02i %02i:%02i:%02i', 2000, datenum[2], datenum[3], datenum[4], datenum[5], datenum[6] )
	elseif datecode == 'MDhm' then
		timeStamp = string.format('%04i-%02i-%02i %02i:%02i', 2000, datenum[2], datenum[3], datenum[4], datenum[5] )
	elseif datecode:sub(1,2) == 'MD' then
		timeStamp = string.format('%04i-%02i-%02i', 2000, datenum[2], datenum[3] )
		datecode = 'MD' -- 'MDhms', 'MDhm' and 'MD' are the only supported format starting with 'MD'; all others will be converted to 'MD'
	elseif datecode:sub(1,1) == 'M' then -- Ambiguous: could mean minutes, but here means month (when parsed as a name/abbrev, not as a number).
		timeStamp = string.format('%04i-%02i-%02i', 2000, datenum[2], 1 )
		datecode  = 'M' 
    -- other possible but unrecognized formats (e.g. 'DHis', 'DHi', 'D', 'His', 'Hi');
    -- note that 'Dh', 'D', 'h', 's' may eventually work, but not 'm' for minute only, which is ambiguous with 'M' for month only.
	else
		timeStamp = nil -- format not supported
	end
	return timeStamp, datecode
end

-- ===========================================================================
-- === Version of the function to be called from other LUA codes
-- ===========================================================================

--[[ ========================================================================================
Date
 
This function is the core part of the ISOdate template. 
 
Usage:
  local Date = require('Module:DateI18n')._Date
  local dateStr = Date({2020, 12, 30, 12, 20, 11}, lang)
 
Parameters:
  * {year,month,day,hour,minute,second, tzhour, tzmin}: broken down date-time component strings or numbers
		tzhour, tzmin are timezone offsets from UTC, hours and minutes
  * lang: The language to display it in
  * case: Language format (genitive, etc.) for some languages
  * class: CSS class for the <time> node, use "" for no metadata at all
]]
function p._Date(datevec, lang, case, class, trim_year)	
	-- make sure inputs are in the right format
	if not lang or not mw.language.isValidCode( lang ) then
		lang = mw.getCurrentFrame():callParserFunction( "int", "lang" ) -- get user's chosen language
	end
	if lang == 'be-tarask' then
		lang = 'be-x-old'
	end
	
	-- process datevec and extract timeStamp and datecode strings as well as numeric datenum array
	local datenum  = p.clean_datevec(datevec)
	local year, month, day = datenum[1], datenum[2], datenum[3]
	local timeStamp, datecode = p.getTimestamp(datenum)
	if not timeStamp then -- something went wrong in parserDatevec
		return ''
	end
	-- Commons [[Data:DateI18n.tab]] page stores prefered formats for diferent 
	-- languages and datecodes (specifying year-month-day or just year of month-day, etc)
	-- Look up country specific format input to {{#time}} function
	local dFormat = getDateFormat(datecode, day, lang)

	-- By default the gramatical case is not specified (case=='') allowing the format to be specified 
	-- in [[Data:DateI18n.tab]]. You can overwrite the default grammatical case of the month by 
	-- specifying "case" variable. This is needed mostly by Slavic languages to create more complex 
	-- phrases as it is done in [[c:Module:Complex date]]
	case = case or ''
	if (lang=='qu' or lang=='qug') and case=='nom' then
		-- Special case related to Quechua and Kichwa languages. The form in the I18n is
		--  Genitive case with suffix "pi" added to month names provided by {#time}}
		-- in Nominative case that "pi" should be removed
		-- see https://commons.wikimedia.org/wiki/Template_talk:Date#Quechua from 2014
		dFormat = dFormat:gsub('F"pi"', 'F')
	elseif case == 'gen' then
		dFormat = p.strReplace(dFormat, "F", "xg")
	elseif case == 'nom' then
		dFormat = p.strReplace(dFormat, "xg", "F")
	elseif case ~= '' and month ~= nil then
		-- see is page [[Data:I18n/MonthCases.tab]] on Commons have name of the month 
		-- in specific gramatic case in desired language. If we have it than replace 
		-- "F" and xg" in dFormat
		local monthMsg = MonthCase(month, case, lang)
		if  monthMsg and monthMsg ~= '' then -- make sure it exists
			dFormat = p.strReplace(dFormat, 'F',  '"'..monthMsg..'"') -- replace default month with month name we already looked up
			dFormat = p.strReplace(dFormat, 'xg', '"'..monthMsg..'"')
		end
	end

    -- Translate the date using specified format.
	-- See https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#mw.language:formatDate and 
	-- https://www.mediawiki.org/wiki/Help:Extension:ParserFunctions##time for explanation of the format
	local langObj = mw.language.new(lang)
	local datestr = langObj:formatDate(dFormat, timeStamp) -- same as using {{#time}} parser function
	
	-- Special case related to Thai solar calendar: prior to 1940 new-year was at different time of year,
	-- so just year (datecode == 'Y') is ambiguous and is replaced by "YYYY or YYYY" phrase
	if lang=='th' and datecode=='Y' and year<=1940 then
		datestr = string.format('%04i หรือ %04i', year+542, year+543 ) 
	end
	
	-- If year < 1000 than either keep the date padded to the length of 4 digits or trim it.
	-- Decide if the year will stay padded with zeros (for years in 0-999 range).
	if year and year < 1000 then
		trim_year = yesno(trim_year, trim_year or '100-999')
		if type(trim_year) == 'string' then
			-- If `trim_year` not a simple boolean, then it's a range of dates.
			-- For example '100-999' means to pad 1-or-2-digit years to be 4-digit long, while keeping 3-digit years as is.
			local YMin, YMax = trim_year:match( '(%d+)-(%d+)' )
			trim_year = YMin and year >= tonumber(YMin) and year <= tonumber(YMax)
		end
		if trim_year then
			datestr = trimYear(datestr, year, lang) -- in datestr replace long year with trimmed one
		end
	end

	-- Append a timezone if present (after the hour and minute of the day).
	if datenum[7] and (datecode:sub(1, 5) == 'YMDhm' or datecode:sub(1, 4) == 'MDhm') then
		-- Use {{#time}} parser function to create timezone string, so that we use the correct character set.
		local sign = (datenum[7]<0) and '−' or '+'
		timeStamp = string.format("2000-01-01 %02i:%02i:00", math.abs(datenum[7]), datenum[8] or 0)
		local timezone = langObj:formatDate('H:i', timeStamp) -- same as using {{#time}} parser function
		datestr = string.format("%s %s%s", datestr, sign, timezone )
	end

	-- HTML formating of date string and tagging for microformats (only for absolute dates with a year).
	if class and class ~= '' and class ~= '-' and datecode:sub(1,1) == 'Y' then 
		local pat = '<time class="%s" datetime="%s" lang="%s" dir="%s" style="white-space:nowrap">%s</time>'
		datestr = pat:format(class, timeStamp, lang, langObj:getDir(), datestr)
	end
	return datestr
end

-- ===========================================================================
-- === Versions of the function to be called from template namespace
-- ===========================================================================

--[[ ========================================================================================
Date
 
This function is the core part of the ISOdate template. 
 
Usage:
{{#invoke:DateI18n|Date|year=|month=|day=|hour=|minute=|second=|tzhour=|tzmin=|lang=en}}
 
Parameters:
  * year, month, day, hour, minute, second: broken down date-time component strings
  * tzhour, tzmin: timezone offset from UTC, hours and minutes
  * lang: The language to display it in
  * case: Language format (genitive, etc.) for some languages
  * class: CSS class for the <time> node, use "" for no metadata at all
]]
function p.Date(frame)
	local args = {}
	for name, value in pairs( frame.args ) do 
		name = string.gsub( string.lower(name), ' ', '_')
		args[name] = value
	end
	return p._Date(	
		{ args.year, args.month, args.day, args.hour, args.minute, args.second, args.tzhour, args.tzmin },
		args.lang,                  -- language
		args.case,                  -- allows to specify grammatical case for the month for languages that use them
		args.class or 'dtstart',    -- allows to set the html class of the time node where the date is included. This is useful for microformats.
		args.trim_year or '100-999' -- by default pad one and 2 digit years to be 4 digit long, while keeping 3 digit years as is
	)	
end

return p