Skip to content
This repository was archived by the owner on Dec 17, 2018. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 26 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,28 @@ the following possible keys:
`'zero', 'one', 'two', 'few', 'many', 'other'`. To disable this check, pass in
an empty array.

- `strictNumberSign` – Inside a `plural` or `selectordinal` statement, a pound
symbol (`#`) is replaced with the input number. By default, `#` is also parsed
as a special character in nested statements too, and can be escaped using
apostrophes (`'#'`). Setting `strictNumberSign` to true will make the parser
follow the ICU MessageFormat spec more closely, and only parse `#` as a
special character directly inside a `plural` or `selectordinal` statement.
Outside those, `#` and `'#'` will be parsed as literal text.

The parser only supports the default `DOUBLE_OPTIONAL` [apostrophe mode]. A
single apostrophe only starts quoted literal text if preceded by a curly brace
(`{}`) or a pound symbol (`#`) inside a `plural` or `selectordinal` statement,
depending on the value of `strictNumberSign`. Otherwise, it is a literal
apostrophe. A double apostrophe is always a literal apostrophe.
- `strict` – By default, the parsing applies a few relaxations to the ICU
MessageFormat spec. Setting `strict: true` will disable these relaxations:
- The `argType` of `simpleArg` formatting functions will be restricted to the
set of `number`, `date`, `time`, `spellout`, `ordinal`, and `duration`,
rather than accepting any lower-case identifier that does not start with a
number.
- The optional `argStyle` of `simpleArg` formatting functions will not be
parsed as any other text, but instead as the spec requires: "In
argStyleText, every single ASCII apostrophe begins and ends quoted literal
text, and unquoted {curly braces} must occur in matched pairs."
- Inside a `plural` or `selectordinal` statement, a pound symbol (`#`) is
replaced with the input number. By default, `#` is also parsed as a special
character in nested statements too, and can be escaped using apostrophes
(`'#'`). In strict mode `#` will be parsed as a special character only
directly inside a `plural` or `selectordinal` statement. Outside those, `#`
and `'#'` will be parsed as literal text.

The parser only supports the default `DOUBLE_OPTIONAL` [apostrophe mode], in
which a single apostrophe only starts quoted literal text if it immediately
precedes a curly brace `{}`, or a pound symbol `#` if inside a plural format. A
literal apostrophe `'` is represented by either a single `'` or a doubled `''`
apostrophe character.

[ICU MessageFormat]: https://messageformat.github.io/guide/
[messageformat]: https://messageformat.github.io/
Expand Down Expand Up @@ -130,7 +139,9 @@ type Function = {
type: 'function',
arg: Identifier,
key: Identifier,
param: string | null
param: {
tokens: options.strict ? [string] : (Token | Octothorpe)[]
} | null
}

type PluralCase = {
Expand All @@ -140,7 +151,7 @@ type PluralCase = {

type SelectCase = {
key: Identifier,
tokens: strictNumberSign ? Token[] : (Token | Octothorpe)[]
tokens: options.strict ? Token[] : (Token | Octothorpe)[]
}

type Octothorpe = {
Expand Down
25 changes: 17 additions & 8 deletions parser.pegjs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ argument = '{' _ arg:id _ '}' {
};
}

select = '{' _ arg:id _ ',' _ (m:'select' { if (options.strictNumberSign) { inPlural = false; } return m; }) _ ',' _ cases:selectCase+ _ '}' {
select = '{' _ arg:id _ ',' _ (m:'select' { if (options.strict) { inPlural = false; } return m; }) _ ',' _ cases:selectCase+ _ '}' {
return {
type: 'select',
arg: arg,
Expand All @@ -42,7 +42,7 @@ plural = '{' _ arg:id _ ',' _ type:(m:('plural'/'selectordinal') { inPlural = tr
};
}

function = '{' _ arg:id _ ',' _ key:(m:id { if (options.strictNumberSign) { inPlural = false; } return m; }) _ param:functionParam? '}' {
function = '{' _ arg:id _ ',' _ key:functionKey _ param:functionParam? '}' {
return {
type: 'function',
arg: arg,
Expand All @@ -66,12 +66,21 @@ pluralKey
= id
/ '=' d:digits { return d; }

functionParam = _ ',' str:paramChars+ { return str.join(''); }

paramChars
= doubleapos
/ quotedCurly
/ [^}]
functionKey
= 'number' / 'date' / 'time' / 'spellout' / 'ordinal' / 'duration'
/ ! 'select' ! 'plural' ! 'selectordinal' key:id
& { return !options.strict && key.toLowerCase() === key && !/^\d/.test(key) }
{ return key }

functionParam
= _ ',' tokens:token* & { return !options.strict } { return { tokens: tokens } }
/ _ ',' parts:strictFunctionParamPart* { return { tokens: [parts.join('')] } }

strictFunctionParamPart
= p:[^'{}]+ { return p.join('') }
/ doubleapos
/ "'" quoted:inapos "'" { return quoted }
/ '{' p:strictFunctionParamPart* '}' { return '{' + p.join('') + '}' }

doubleapos = "''" { return "'"; }

Expand Down
160 changes: 116 additions & 44 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -230,28 +230,38 @@ describe("Plurals", function() {
});

it("should support quoting", function() {
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].type).to.eql('function');
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].arg).to.eql('x');
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].key).to.eql('date');
// Octothorpe is not special here regardless of strict number sign
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens[0].param).to.eql("y-M-dd # '#'");
expect(parse("{NUM, plural, one{{x,date,y-M-dd # '#'}} two{two}}")[0].cases[0].tokens).to.eql([{
type: 'function', arg: 'x', key: 'date',
param: {
tokens: [ 'y-M-dd ', { type: 'octothorpe' }, ' #' ]
}
}]);
expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens).to.eql([
{ type: 'octothorpe' }, " ' ", { type: 'octothorpe' }
]);
expect(parse("{NUM, plural, one{# '#'} two{two}}")[0].cases[0].tokens).to.eql([
{ type: 'octothorpe' }, ' #'
]);
expect(parse("{NUM, plural, one{one#} two{two}}")[0].cases[0].tokens).to.eql([
'one', { type: 'octothorpe' }
]);
})

expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[0].type).to.eql('octothorpe');
expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[1]).to.eql(" ' ");
expect(parse("{NUM, plural, one{# '' #} two{two}}")[0].cases[0].tokens[2].type).to.eql('octothorpe');
expect(parse("{NUM, plural, one{# '#'} two{two}}")[0].cases[0].tokens[0].type).to.eql('octothorpe');
expect(parse("{NUM, plural, one{# '#'} two{two}}")[0].cases[0].tokens[1]).to.eql(" #");
describe('options.strict', function() {
var src = "{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}";

expect(parse("{NUM, plural, one{one#} two{two}}")[0].cases[0].tokens[0]).to.eql('one');
expect(parse("{NUM, plural, one{one#} two{two}}")[0].cases[0].tokens[1].type).to.eql('octothorpe');
it('should parse # correctly without strict option', function() {
expect(parse(src)[0].cases[0].tokens[2].cases[0].tokens).to.eql([
{ type: 'octothorpe' }, ' # one', { type: 'octothorpe' }
]);
})

// without strict number sign
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[0].type).to.eql('octothorpe')
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[1]).to.eql(' # one')
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}")[0].cases[0].tokens[2].cases[0].tokens[2].type).to.eql('octothorpe')
// with strict number sign
expect(parse("{NUM, plural, one{# {VAR,select,key{# '#' one#}}} two{two}}", { strictNumberSign: true })[0].cases[0].tokens[2].cases[0].tokens[0]).to.eql('# \'#\' one#')
});
it('should parse # correctly with strict option', function() {
expect(parse(src, { strict: true })[0].cases[0].tokens[2].cases[0].tokens).to.eql([
"# '#' one#"
]);
})
})

});
describe("Ordinals", function() {
Expand All @@ -277,43 +287,105 @@ describe("Ordinals", function() {

});
describe("Functions", function() {
it("should accept no parameters", function() {
expect(parse('{var,date}')[0].type).to.eql('function');
expect(parse('{var,date}')[0].key).to.eql('date');
expect(parse('{var,date}')[0].param).to.be.null;
it("should require lower-case type", function() {
expect(function(){ parse('{var,date}'); }).to.not.throwError();
expect(function(){ parse('{var,Date}'); }).to.throwError();
expect(function(){ parse('{var,daTe}'); }).to.throwError();
expect(function(){ parse('{var,9ate}'); }).to.throwError();
})

it('should be gracious with whitespace around arg and key', function() {
var expected = { type: 'function', arg: 'var', key: 'date', param: null }
expect(parse('{var,date}')[0]).to.eql(expected);
expect(parse('{var, date}')[0]).to.eql(expected);
expect(parse('{ var, date }')[0]).to.eql(expected);
expect(parse('{\nvar, \ndate\n}')[0]).to.eql(expected);
})

it("should accept parameters", function() {
expect(parse('{var,date,long}')[0].type).to.eql('function');
expect(parse('{var,date,long}')[0].key).to.eql('date');
expect(parse('{var,date,long}')[0].param).to.eql('long');
expect(parse('{var,date,long,short}')[0].param).to.eql('long,short');
expect(parse('{var,date,long}')[0]).to.eql({
type: 'function', arg: 'var', key: 'date', param: { tokens: ['long'] }
});
expect(parse('{var,date,long,short}')[0].param.tokens).to.eql(['long,short']);
})

it("should accept parameters with whitespace", function() {
expect(parse('{var,date,y-M-d HH:mm:ss zzzz}')[0].type).to.eql('function');
expect(parse('{var,date,y-M-d HH:mm:ss zzzz}')[0].key).to.eql('date');
expect(parse('{var,date,y-M-d HH:mm:ss zzzz}')[0].param).to.eql('y-M-d HH:mm:ss zzzz');
expect(parse('{var,date, y-M-d HH:mm:ss zzzz }')[0].param).to.eql(' y-M-d HH:mm:ss zzzz ');
expect(parse('{var,date,y-M-d HH:mm:ss zzzz}')[0]).to.eql({
type: 'function', arg: 'var', key: 'date', param: { tokens: ['y-M-d HH:mm:ss zzzz'] }
});
expect(parse('{var,date, y-M-d HH:mm:ss zzzz }')[0].param.tokens).to.eql([' y-M-d HH:mm:ss zzzz ']);
})

it("should accept parameters with special characters", function() {
expect(parse("{var,date,y-M-d '{,}' '' HH:mm:ss zzzz}")[0].type).to.eql('function');
expect(parse("{var,date,y-M-d '{,}' '' HH:mm:ss zzzz}")[0].key).to.eql('date');
expect(parse("{var,date,y-M-d '{,}' '' HH:mm:ss zzzz}")[0].param).to.eql("y-M-d {,} ' HH:mm:ss zzzz");
expect(parse("{var,date,y-M-d '{,}' '' HH:mm:ss zzzz'}'}")[0].param).to.eql("y-M-d {,} ' HH:mm:ss zzzz}");
expect(parse("{var,date,y-M-d # HH:mm:ss zzzz}")[0].param).to.eql("y-M-d # HH:mm:ss zzzz");
expect(parse("{var,date,y-M-d '#' HH:mm:ss zzzz}")[0].param).to.eql("y-M-d '#' HH:mm:ss zzzz");
expect(parse("{var,date,y-M-d, HH:mm:ss zzzz}")[0].param).to.eql("y-M-d, HH:mm:ss zzzz");
expect(parse("{var,date,y-M-d '{,}' '' HH:mm:ss zzzz}")[0]).to.eql({
type: 'function', arg: 'var', key: 'date', param: { tokens: [ 'y-M-d {,} \' HH:mm:ss zzzz' ] }
});
expect(parse("{var,date,y-M-d '{,}' '' HH:mm:ss zzzz'}'}")[0].param.tokens).to.eql(["y-M-d {,} ' HH:mm:ss zzzz}"]);
expect(parse("{var,date,y-M-d # HH:mm:ss zzzz}")[0].param.tokens).to.eql(["y-M-d # HH:mm:ss zzzz"]);
expect(parse("{var,date,y-M-d '#' HH:mm:ss zzzz}")[0].param.tokens).to.eql(["y-M-d '#' HH:mm:ss zzzz"]);
expect(parse("{var,date,y-M-d, HH:mm:ss zzzz}")[0].param.tokens).to.eql(["y-M-d, HH:mm:ss zzzz"]);
})

it("should be gracious with whitespace around arg and key", function() {
var firstRes = JSON.stringify(parse('{var, date}'));
expect(JSON.stringify(parse('{ var, date }'))).to.eql(firstRes);
expect(JSON.stringify(parse('{var,date}'))).to.eql(firstRes);
expect(JSON.stringify(parse('{\nvar, \ndate\n}'))).to.eql(firstRes);
});
it('should accept parameters containing a basic variable', function() {
expect(parse('{foo, date, {bar}}')[0]).to.eql({
type: 'function',
arg: 'foo',
key: 'date',
param: { tokens: [' ', { arg: 'bar', type: 'argument' }] }
})
})

it('should accept parameters containing a select', function() {
expect(parse('{foo, date, {bar, select, other{baz}}}')[0]).to.eql({
type: 'function',
arg: 'foo',
key: 'date',
param: { tokens: [' ', {
arg: 'bar',
type: 'select',
cases: [{ key: 'other', tokens: ['baz'] }]
}] }
})
})

it('should accept parameters containing a plural', function() {
expect(parse('{foo, date, {bar, plural, other{#}}}')[0]).to.eql({
type: 'function',
arg: 'foo',
key: 'date',
param: { tokens: [' ', {
arg: 'bar',
type: 'plural',
offset: 0,
cases: [{ key: 'other', tokens: [{ type: 'octothorpe' }] }]
}] }
})
})

describe('options.strict', function() {
it('should require known function key with strict option', function() {
expect(function() { parse('{foo, bar}') }).to.not.throwError()
expect(function() { parse('{foo, bar}', { strict: true }) }).to.throwError()
expect(function() { parse('{foo, date}', { strict: true }) }).to.not.throwError()
})

it('parameter parsing should obey strict option', function() {
expect(parse("{foo, date, {bar'}', quote'', other{#}}}", { strict: true })[0]).to.eql({
type: 'function',
arg: 'foo',
key: 'date',
param: { tokens: [" {bar}, quote', other{#}}"] }
})
})

it('should require matched braces in parameter if strict option is set', function() {
expect(function() {
parse("{foo, date, {bar{}}", { strict: true })
}).to.throwError();
})
})
});

describe("Nested/Recursive blocks", function() {

it("should allow a select statement inside of a select statement", function() {
Expand Down