From 3122c2cdc45a964efedad8953a2df67205c3e3a8 Mon Sep 17 00:00:00 2001
From: Behdad Esfahbod <behdad@behdad.org>
Date: Sat, 4 Dec 2021 19:50:33 -0800
Subject: [PATCH] [buffer] Add HB_GLYPH_FLAG_UNSAFE_TO_CONCAT

Fixes https://github.com/harfbuzz/harfbuzz/issues/1463
Upstream-Status: Backport from [https://github.com/harfbuzz/harfbuzz/commit/3122c2cdc45a964efedad8953a2df67205c3e3a8]
Comment1: To backport the fix for CVE-2023-25193, add defination for HB_GLYPH_FLAG_UNSAFE_TO_CONCAT. This patch is needed along with CVE-2023-25193-pre1.patch for sucessfull porting.
Signed-off-by: Siddharth Doshi <sdoshi@mvista.com>
---
 src/hb-buffer.cc             | 10 ++---
 src/hb-buffer.h              | 76 ++++++++++++++++++++++++++++++------
 src/hb-buffer.hh             | 33 ++++++++++------
 src/hb-ot-layout-gsubgpos.hh | 39 +++++++++++++++---
 src/hb-ot-shape.cc           |  8 +---
 5 files changed, 124 insertions(+), 42 deletions(-)

diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc
index 6131c86..bba5eae 100644
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc
@@ -610,14 +610,14 @@ done:
 }
 
 void
-hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end)
+hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end, hb_mask_t mask)
 {
   unsigned int cluster = (unsigned int) -1;
   cluster = _unsafe_to_break_find_min_cluster (info, start, end, cluster);
-  _unsafe_to_break_set_mask (info, start, end, cluster);
+  _unsafe_to_break_set_mask (info, start, end, cluster, mask);
 }
 void
-hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end)
+hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end, hb_mask_t mask)
 {
   if (!have_output)
   {
@@ -631,8 +631,8 @@ hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int en
   unsigned int cluster = (unsigned int) -1;
   cluster = _unsafe_to_break_find_min_cluster (out_info, start, out_len, cluster);
   cluster = _unsafe_to_break_find_min_cluster (info, idx, end, cluster);
-  _unsafe_to_break_set_mask (out_info, start, out_len, cluster);
-  _unsafe_to_break_set_mask (info, idx, end, cluster);
+  _unsafe_to_break_set_mask (out_info, start, out_len, cluster, mask);
+  _unsafe_to_break_set_mask (info, idx, end, cluster, mask);
 }
 
 void
diff --git a/src/hb-buffer.h b/src/hb-buffer.h
index d5cb746..42dc92a 100644
--- a/src/hb-buffer.h
+++ b/src/hb-buffer.h
@@ -77,26 +77,76 @@ typedef struct hb_glyph_info_t
  * @HB_GLYPH_FLAG_UNSAFE_TO_BREAK: Indicates that if input text is broken at the
  * 				   beginning of the cluster this glyph is part of,
  * 				   then both sides need to be re-shaped, as the
- * 				   result might be different.  On the flip side,
- * 				   it means that when this flag is not present,
- * 				   then it's safe to break the glyph-run at the
- * 				   beginning of this cluster, and the two sides
- * 				   represent the exact same result one would get
- * 				   if breaking input text at the beginning of
- * 				   this cluster and shaping the two sides
- * 				   separately.  This can be used to optimize
- * 				   paragraph layout, by avoiding re-shaping
- * 				   of each line after line-breaking, or limiting
- * 				   the reshaping to a small piece around the
- * 				   breaking point only.
+ * 				   result might be different.
+ *
+ * 				   On the flip side, it means that when this
+ * 				   flag is not present, then it is safe to break
+ * 				   the glyph-run at the beginning of this
+ * 				   cluster, and the two sides will represent the
+ * 				   exact same result one would get if breaking
+ * 				   input text at the beginning of this cluster
+ * 				   and shaping the two sides separately.
+ *
+ * 				   This can be used to optimize paragraph
+ * 				   layout, by avoiding re-shaping of each line
+ * 				   after line-breaking.
+ *
+ * @HB_GLYPH_FLAG_UNSAFE_TO_CONCAT: Indicates that if input text is changed on one
+ * 				   side of the beginning of the cluster this glyph
+ * 				   is part of, then the shaping results for the
+ * 				   other side might change.
+ *
+ * 				   Note that the absence of this flag will NOT by
+ * 				   itself mean that it IS safe to concat text.
+ * 				   Only two pieces of text both of which clear of
+ * 				   this flag can be concatenated safely.
+ *
+ * 				   This can be used to optimize paragraph
+ * 				   layout, by avoiding re-shaping of each line
+ * 				   after line-breaking, by limiting the
+ * 				   reshaping to a small piece around the
+ * 				   breaking positin only, even if the breaking
+ * 				   position carries the
+ * 				   #HB_GLYPH_FLAG_UNSAFE_TO_BREAK or when
+ * 				   hyphenation or other text transformation
+ * 				   happens at line-break position, in the following
+ * 				   way:
+ *
+ * 				   1. Iterate back from the line-break position till
+ * 				   the the first cluster start position that is
+ * 				   NOT unsafe-to-concat, 2. shape the segment from
+ * 				   there till the end of line, 3. check whether the
+ * 				   resulting glyph-run also is clear of the
+ * 				   unsafe-to-concat at its start-of-text position;
+ * 				   if it is, just splice it into place and the line
+ * 				   is shaped; If not, move on to a position further
+ * 				   back that is clear of unsafe-to-concat and retry
+ * 				   from there, and repeat.
+ *
+ * 				   At the start of next line a similar algorithm can
+ * 				   be implemented. A slight complication will arise,
+ * 				   because while our buffer API has a way to
+ * 				   return flags for position corresponding to
+ * 				   start-of-text, there is currently no position
+ * 				   corresponding to end-of-text.  This limitation
+ * 				   can be alleviated by shaping more text than needed
+ * 				   and looking for unsafe-to-concat flag within text
+ * 				   clusters.
+ *
+ * 				   The #HB_GLYPH_FLAG_UNSAFE_TO_BREAK flag will
+ * 				   always imply this flag.
+ *
+ * 				   Since: REPLACEME
+ *
  * @HB_GLYPH_FLAG_DEFINED: All the currently defined flags.
  *
  * Since: 1.5.0
  */
 typedef enum { /*< flags >*/
   HB_GLYPH_FLAG_UNSAFE_TO_BREAK		= 0x00000001,
+  HB_GLYPH_FLAG_UNSAFE_TO_CONCAT	= 0x00000002,
 
-  HB_GLYPH_FLAG_DEFINED			= 0x00000001 /* OR of all defined flags */
+  HB_GLYPH_FLAG_DEFINED			= 0x00000003 /* OR of all defined flags */
 } hb_glyph_flags_t;
 
 HB_EXTERN hb_glyph_flags_t
diff --git a/src/hb-buffer.hh b/src/hb-buffer.hh
index b5596d9..beac7b6 100644
--- a/src/hb-buffer.hh
+++ b/src/hb-buffer.hh
@@ -67,8 +67,8 @@ enum hb_buffer_scratch_flags_t {
   HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES		= 0x00000002u,
   HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK		= 0x00000004u,
   HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT		= 0x00000008u,
-  HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK		= 0x00000010u,
-  HB_BUFFER_SCRATCH_FLAG_HAS_CGJ			= 0x00000020u,
+  HB_BUFFER_SCRATCH_FLAG_HAS_CGJ			= 0x00000010u,
+  HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS		= 0x00000020u,
 
   /* Reserved for complex shapers' internal use. */
   HB_BUFFER_SCRATCH_FLAG_COMPLEX0			= 0x01000000u,
@@ -324,8 +324,19 @@ struct hb_buffer_t
       return;
     unsafe_to_break_impl (start, end);
   }
-  HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end);
-  HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end);
+  void unsafe_to_concat (unsigned int start,
+			 unsigned int end)
+  {
+    if (end - start < 2)
+      return;
+    unsafe_to_break_impl (start, end, HB_GLYPH_FLAG_UNSAFE_TO_CONCAT);
+  }
+  HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end,
+					 hb_mask_t mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT);
+  HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end,
+						   hb_mask_t mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT);
+  void unsafe_to_concat_from_outbuffer (unsigned int start, unsigned int end)
+  { unsafe_to_break_from_outbuffer (start, end, HB_GLYPH_FLAG_UNSAFE_TO_CONCAT); }
 
 
   /* Internal methods */
@@ -377,12 +388,7 @@ struct hb_buffer_t
   set_cluster (hb_glyph_info_t &inf, unsigned int cluster, unsigned int mask = 0)
   {
     if (inf.cluster != cluster)
-    {
-      if (mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK)
-	inf.mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
-      else
-	inf.mask &= ~HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
-    }
+      inf.mask = (inf.mask & ~HB_GLYPH_FLAG_DEFINED) | (mask & HB_GLYPH_FLAG_DEFINED);
     inf.cluster = cluster;
   }
 
@@ -398,13 +404,14 @@ struct hb_buffer_t
   void
   _unsafe_to_break_set_mask (hb_glyph_info_t *infos,
 			     unsigned int start, unsigned int end,
-			     unsigned int cluster)
+			     unsigned int cluster,
+			     hb_mask_t mask)
   {
     for (unsigned int i = start; i < end; i++)
       if (cluster != infos[i].cluster)
       {
-	scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK;
-	infos[i].mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
+	scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS;
+	infos[i].mask |= mask;
       }
   }
 
diff --git a/src/hb-ot-layout-gsubgpos.hh b/src/hb-ot-layout-gsubgpos.hh
index 579d178..a6ca456 100644
--- a/src/hb-ot-layout-gsubgpos.hh
+++ b/src/hb-ot-layout-gsubgpos.hh
@@ -369,7 +369,7 @@ struct hb_ot_apply_context_t :
     may_skip (const hb_glyph_info_t &info) const
     { return matcher.may_skip (c, info); }
 
-    bool next ()
+    bool next (unsigned *unsafe_to = nullptr)
     {
       assert (num_items > 0);
       while (idx + num_items < end)
@@ -392,11 +392,17 @@ struct hb_ot_apply_context_t :
 	}
 
 	if (skip == matcher_t::SKIP_NO)
+	{
+	  if (unsafe_to)
+	    *unsafe_to = idx + 1;
 	  return false;
+	}
       }
+      if (unsafe_to)
+        *unsafe_to = end;
       return false;
     }
-    bool prev ()
+    bool prev (unsigned *unsafe_from = nullptr)
     {
       assert (num_items > 0);
       while (idx > num_items - 1)
@@ -419,8 +425,14 @@ struct hb_ot_apply_context_t :
 	}
 
 	if (skip == matcher_t::SKIP_NO)
+	{
+	  if (unsafe_from)
+	    *unsafe_from = hb_max (1u, idx) - 1u;
 	  return false;
+	}
       }
+      if (unsafe_from)
+        *unsafe_from = 0;
       return false;
     }
 
@@ -834,7 +846,12 @@ static inline bool match_input (hb_ot_apply_context_t *c,
   match_positions[0] = buffer->idx;
   for (unsigned int i = 1; i < count; i++)
   {
-    if (!skippy_iter.next ()) return_trace (false);
+    unsigned unsafe_to;
+    if (!skippy_iter.next (&unsafe_to))
+    {
+      c->buffer->unsafe_to_concat (c->buffer->idx, unsafe_to);
+      return_trace (false);
+    }
 
     match_positions[i] = skippy_iter.idx;
 
@@ -1022,8 +1039,14 @@ static inline bool match_backtrack (hb_ot_apply_context_t *c,
   skippy_iter.set_match_func (match_func, match_data, backtrack);
 
   for (unsigned int i = 0; i < count; i++)
-    if (!skippy_iter.prev ())
+  {
+    unsigned unsafe_from;
+    if (!skippy_iter.prev (&unsafe_from))
+    {
+      c->buffer->unsafe_to_concat_from_outbuffer (unsafe_from, c->buffer->idx);
       return_trace (false);
+    }
+  }
 
   *match_start = skippy_iter.idx;
 
@@ -1045,8 +1068,14 @@ static inline bool match_lookahead (hb_ot_apply_context_t *c,
   skippy_iter.set_match_func (match_func, match_data, lookahead);
 
   for (unsigned int i = 0; i < count; i++)
-    if (!skippy_iter.next ())
+  {
+    unsigned unsafe_to;
+    if (!skippy_iter.next (&unsafe_to))
+    {
+      c->buffer->unsafe_to_concat (c->buffer->idx + offset, unsafe_to);
       return_trace (false);
+    }
+  }
 
   *end_index = skippy_iter.idx + 1;
 
diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc
index 5d9a70c..5d10b30 100644
--- a/src/hb-ot-shape.cc
+++ b/src/hb-ot-shape.cc
@@ -1008,7 +1008,7 @@ hb_propagate_flags (hb_buffer_t *buffer)
   /* Propagate cluster-level glyph flags to be the same on all cluster glyphs.
    * Simplifies using them. */
 
-  if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK))
+  if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS))
     return;
 
   hb_glyph_info_t *info = buffer->info;
@@ -1017,11 +1017,7 @@ hb_propagate_flags (hb_buffer_t *buffer)
   {
     unsigned int mask = 0;
     for (unsigned int i = start; i < end; i++)
-      if (info[i].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK)
-      {
-	 mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
-	 break;
-      }
+      mask |= info[i].mask & HB_GLYPH_FLAG_DEFINED;
     if (mask)
       for (unsigned int i = start; i < end; i++)
 	info[i].mask |= mask;
-- 
2.25.1