From 4d9c809355b574f2a58eac119f5e076c48e4d1e2 Mon Sep 17 00:00:00 2001
From: Ulya Trofimovich <skvadrik@gmail.com>
Date: Thu, 23 Apr 2020 22:16:51 +0100
Subject: [PATCH] Rewrite recursion into iteration (nullable RE).

This is to avoid stack overflow on large RE (especially on instrumented
builds that have larger stack frames, like AddressSanitizer).

Partial fix for #219 "overflow-1.re test fails on system with small stack".

Upstream-Status: Backport:
https://github.com/skvadrik/re2c/commit/4d9c809355b574f2a58eac119f5e076c48e4d1e2

CVE: CVE-2018-21232

Signed-off-by: Davide Gardenal <davide.gardenal@huawei.com>
---
diff --git a/src/re/nullable.cc b/src/re/nullable.cc
--- a/src/re/nullable.cc	(revision e58939b34bb4c37cd990f82dc286f21cb405743e)
+++ b/src/re/nullable.cc	(date 1647253886226)
@@ -9,43 +9,100 @@
 #include "src/re/tag.h"
 
 namespace re2c {
+    namespace {
+
+        struct StackItem {
+            const RE *re;   // current sub-RE
+            uint8_t   succ; // index of the next sucessor to be visited
+        };
 
-static bool nullable(const RESpec &spec, const RE *re, bool &trail)
-{
-	if (trail) return true;
+        static bool nullable(const RESpec &spec, std::vector<StackItem> &stack, const RE *re0)
+        {
+            // the "nullable" status of the last sub-RE visited by DFS
+            bool null = false;
 
-	switch (re->type) {
-		case RE::NIL: return true;
-		case RE::SYM: return false;
-		case RE::ITER:
-			return nullable(spec, re->iter.re, trail);
-		case RE::TAG:
-			trail |= trailing(spec.tags[re->tag.idx]);
-			return true;
-		case RE::ALT:
-			return nullable(spec, re->alt.re1, trail)
-				|| nullable(spec, re->alt.re2, trail);
-		case RE::CAT:
-			return nullable(spec, re->cat.re1, trail)
-				&& nullable(spec, re->cat.re2, trail);
-	}
-	return false; /* unreachable */
-}
+            const StackItem i0 = {re0, 0};
+            stack.push_back(i0);
+
+            while (!stack.empty()) {
+                const StackItem i = stack.back();
+                stack.pop_back();
+
+                const RE *re = i.re;
+                if (re->type == RE::NIL) {
+                    null = true;
+                }
+                else if (re->type == RE::SYM) {
+                    null = false;
+                }
+                else if (re->type == RE::TAG) {
+                    null = true;
 
-/*
- * warn about rules that match empty string
- * (including rules with nonempty trailing context)
- * false positives on partially self-shadowed rules like [^]?
- */
-void warn_nullable(const RESpec &spec, const std::string &cond)
-{
-	const size_t nre = spec.res.size();
-	for (size_t i = 0; i < nre; ++i) {
-		bool trail = false;
-		if (nullable(spec, spec.res[i], trail)) {
-			spec.warn.match_empty_string(spec.rules[i].code->fline, cond);
-		}
-	}
-}
+                    // Trailing context is always in top-level concatenation, and sub-RE
+                    // are visited from left to right. Since we are here, sub-RE to the
+                    // left of the trailing context is nullable (otherwise we would not
+                    // recurse into the right sub-RE), therefore the whole RE is nullable.
+                    if (trailing(spec.tags[re->tag.idx])) {
+                        //DASSERT(stack.size() == 1 && stack.back().re->type == RE::CAT);
+                        stack.pop_back();
+                        break;
+                    }
+                }
+                else if (re->type == RE::ALT) {
+                    if (i.succ == 0) {
+                        // recurse into the left sub-RE
+                        StackItem k = {re, 1};
+                        stack.push_back(k);
+                        StackItem j = {re->alt.re1, 0};
+                        stack.push_back(j);
+                    }
+                    else if (!null) {
+                        // if the left sub-RE is nullable, so is alternative, so stop
+                        // recursion; otherwise recurse into the right sub-RE
+                        StackItem j = {re->alt.re2, 0};
+                        stack.push_back(j);
+                    }
+                }
+                else if (re->type == RE::CAT) {
+                    if (i.succ == 0) {
+                        // recurse into the left sub-RE
+                        StackItem k = {re, 1};
+                        stack.push_back(k);
+                        StackItem j = {re->cat.re1, 0};
+                        stack.push_back(j);
+                    }
+                    else if (null) {
+                        // if the left sub-RE is not nullable, neither is concatenation,
+                        // so stop recursion; otherwise recurse into the right sub-RE
+                        StackItem j = {re->cat.re2, 0};
+                        stack.push_back(j);
+                    }
+                }
+                else if (re->type == RE::ITER) {
+                    // iteration is nullable if the sub-RE is nullable
+                    // (zero repetitions is represented with alternative)
+                    StackItem j = {re->iter.re, 0};
+                    stack.push_back(j);
+                }
+            }
+
+            //DASSERT(stack.empty());
+            return null;
+        }
+
+    } // anonymous namespace
+
+// Warn about rules that match empty string (including rules with nonempty
+// trailing context). False positives on partially self-shadowed rules like [^]?
+    void warn_nullable(const RESpec &spec, const std::string &cond)
+    {
+        std::vector<StackItem> stack;
+        const size_t nre = spec.res.size();
+        for (size_t i = 0; i < nre; ++i) {
+            if (nullable(spec, stack, spec.res[i])) {
+                spec.warn.match_empty_string(spec.rules[i].code->fline, cond);
+            }
+        }
+    }
 
 } // namespace re2c