From 814efb86145da1d5579a3d4cfedff17cc03d622b Mon Sep 17 00:00:00 2001
From: Ivan Goncharov <ivan.goncharov.ua@gmail.com>
Date: Thu, 28 Jul 2022 15:09:58 +0300
Subject: [PATCH 1/2] parser: limit maximum number of tokens

Motivation: Parser CPU and memory usage is linear to the number of tokens in a
document however in extreme cases it becomes quadratic due to memory exhaustion.
On my mashine it happens on queries with 2k tokens.
For example:
```
{ a a <repeat 2k times> a }
```
It takes 741ms on my machine.
But if we create document of the same size but smaller number of
tokens it would be a lot faster.
Example:
```
{ a(arg: "a <repeat 2k times> a" }
```
Now it takes only 17ms to process, which is 43 time faster.

That mean if we limit document size we should make this limit small
since it take only two bytes to create a token, e.g. ` a`.
But that will hart legit documents that have long tokens in them
(comments, describtions, strings, long names, etc.).

That's why this PR adds a mechanism to limit number of token in
parsed document.
Also exact same mechanism implemented in graphql-java, see:
https://github.com/graphql-java/graphql-java/pull/2549

I also tried alternative approach of counting nodes and it gives
slightly better approximation of how many resources would be consumed.
However comparing to the tokens, AST nodes is implementation detail of graphql-js
so it's imposible to replicate in other implementation (e.g. to count
this number on a client).
---
 src/language/__tests__/parser-test.ts | 13 ++++++++
 src/language/parser.ts                | 43 ++++++++++++++++++++++-----
 2 files changed, 48 insertions(+), 8 deletions(-)
diff --git a/src/language/__tests__/parser-test.ts b/src/language/__tests__/parser-test.ts
index 651da1cab1..cb7222048f 100644
--- a/src/language/__tests__/parser-test.ts
+++ b/src/language/__tests__/parser-test.ts
@@ -88,6 +88,19 @@ describe('Parser', () => {
     `);
   });
 
+  it('limit maximum number of tokens', () => {
+    expect(() => parse('{ foo }', { maxTokens: 3 })).to.not.throw();
+    expect(() => parse('{ foo }', { maxTokens: 2 })).to.throw(
+      'Syntax Error: Document contains more that 2 tokens. Parsing aborted.',
+    );
+
+    expect(() => parse('{ foo(bar: "baz") }', { maxTokens: 8 })).to.not.throw();
+
+    expect(() => parse('{ foo(bar: "baz") }', { maxTokens: 7 })).to.throw(
+      'Syntax Error: Document contains more that 7 tokens. Parsing aborted.',
+    );
+  });
+
   it('parses variable inline values', () => {
     expect(() =>
       parse('{ field(complex: { a: { b: [ $var ] } }) }'),
diff --git a/src/language/parser.ts b/src/language/parser.ts
index d74384efd9..01296aa6f3 100644
--- a/src/language/parser.ts
+++ b/src/language/parser.ts
@@ -82,6 +82,15 @@ export interface ParseOptions {
    */
   noLocation?: boolean | undefined;
 
+  /**
+   * Parser CPU and memory usage is linear to the number of tokens in a document
+   * however in extreme cases it becomes quadratic due to memory exhaustion.
+   * Parsing happens before validation so even invalid queries can burn a lots of
+   * CPU time and memory.
+   * To prevent this you can set limit on maximum number of tokens.
+   */
+  maxTokens?: number | undefined;
+
   /**
    * @deprecated will be removed in the v17.0.0
    *
@@ -206,12 +215,14 @@ export function parseType(
 export class Parser {
   protected _options: ParseOptions;
   protected _lexer: Lexer;
+  protected _tokenCounter: number;
 
   constructor(source: string | Source, options: ParseOptions = {}) {
     const sourceObj = isSource(source) ? source : new Source(source);
 
     this._lexer = new Lexer(sourceObj);
     this._options = options;
+    this._tokenCounter = 0;
   }
 
   /**
@@ -634,13 +645,13 @@ export class Parser {
       case TokenKind.BRACE_L:
         return this.parseObject(isConst);
       case TokenKind.INT:
-        this._lexer.advance();
+        this.advanceLexer();
         return this.node<IntValueNode>(token, {
           kind: Kind.INT,
           value: token.value,
         });
       case TokenKind.FLOAT:
-        this._lexer.advance();
+        this.advanceLexer();
         return this.node<FloatValueNode>(token, {
           kind: Kind.FLOAT,
           value: token.value,
@@ -649,7 +660,7 @@ export class Parser {
       case TokenKind.BLOCK_STRING:
         return this.parseStringLiteral();
       case TokenKind.NAME:
-        this._lexer.advance();
+        this.advanceLexer();
         switch (token.value) {
           case 'true':
             return this.node<BooleanValueNode>(token, {
@@ -695,7 +706,7 @@ export class Parser {
 
   parseStringLiteral(): StringValueNode {
     const token = this._lexer.token;
-    this._lexer.advance();
+    this.advanceLexer();
     return this.node<StringValueNode>(token, {
       kind: Kind.STRING,
       value: token.value,
@@ -1479,7 +1490,7 @@ export class Parser {
   expectToken(kind: TokenKind): Token {
     const token = this._lexer.token;
     if (token.kind === kind) {
-      this._lexer.advance();
+      this.advanceLexer();
       return token;
     }
 
@@ -1497,7 +1508,7 @@ export class Parser {
   expectOptionalToken(kind: TokenKind): boolean {
     const token = this._lexer.token;
     if (token.kind === kind) {
-      this._lexer.advance();
+      this.advanceLexer();
       return true;
     }
     return false;
@@ -1510,7 +1521,7 @@ export class Parser {
   expectKeyword(value: string): void {
     const token = this._lexer.token;
     if (token.kind === TokenKind.NAME && token.value === value) {
-      this._lexer.advance();
+      this.advanceLexer();
     } else {
       throw syntaxError(
         this._lexer.source,
@@ -1527,7 +1538,7 @@ export class Parser {
   expectOptionalKeyword(value: string): boolean {
     const token = this._lexer.token;
     if (token.kind === TokenKind.NAME && token.value === value) {
-      this._lexer.advance();
+      this.advanceLexer();
       return true;
     }
     return false;
@@ -1616,6 +1627,22 @@ export class Parser {
     } while (this.expectOptionalToken(delimiterKind));
     return nodes;
   }
+
+  advanceLexer(): void {
+    const { maxTokens } = this._options;
+    const token = this._lexer.advance();
+
+    if (maxTokens !== undefined && token.kind !== TokenKind.EOF) {
+      ++this._tokenCounter;
+      if (this._tokenCounter > maxTokens) {
+        throw syntaxError(
+          this._lexer.source,
+          token.start,
+          `Document contains more that ${maxTokens} tokens. Parsing aborted.`,
+        );
+      }
+    }
+  }
 }
 
 /**

From 876937838fac9e9fb1ac24240fd794c9fe34cc8b Mon Sep 17 00:00:00 2001
From: Ivan Goncharov <ivan.goncharov.ua@gmail.com>
Date: Tue, 2 Aug 2022 13:35:52 +0300
Subject: [PATCH 2/2] Apply suggestions from code review

Co-authored-by: Yaacov Rydzinski  <yaacovCR@gmail.com>
---
 src/language/parser.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/language/parser.ts b/src/language/parser.ts
index 01296aa6f3..e37f02b763 100644
--- a/src/language/parser.ts
+++ b/src/language/parser.ts
@@ -85,9 +85,9 @@ export interface ParseOptions {
   /**
    * Parser CPU and memory usage is linear to the number of tokens in a document
    * however in extreme cases it becomes quadratic due to memory exhaustion.
-   * Parsing happens before validation so even invalid queries can burn a lots of
+   * Parsing happens before validation so even invalid queries can burn lots of
    * CPU time and memory.
-   * To prevent this you can set limit on maximum number of tokens.
+   * To prevent this you can set a maximum number of tokens allowed within a document.
    */
   maxTokens?: number | undefined;