fix: strip HTML-encoded tags (decode entities before stripping)

The previous stripHtml decoded </> after the regex pass, so content stored as <p>text</p> was never stripped. Now entities are decoded first, then all tags are removed. Also strip HTML when prefilling compose from an existing post (Edit/Repost) so the text field shows clean content, not raw markup. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-10 17:01:15 +02:00
parent 365f44dbe4
commit 4a531df8bd
2 changed files with 13 additions and 8 deletions
@@ -1,14 +1,19 @@
 export function stripHtml(html: string): string {
-  return html
-    .replace(/<br\s*\/?>/gi, "\n")
-    .replace(/<\/p>/gi, "\n")
-    .replace(/<[^>]+>/g, "")
+  // Decode entities first so encoded tags like &lt;p&gt; are also stripped
+  let s = html
    .replace(/&amp;/g, "&")
    .replace(/&lt;/g, "<")
    .replace(/&gt;/g, ">")
    .replace(/&quot;/g, '"')
    .replace(/&#39;/g, "'")
-    .replace(/&nbsp;/g, " ")
-    .replace(/\n{3,}/g, "\n\n")
-    .trim();
+    .replace(/&nbsp;/g, " ");
+  // Block-level tags → newlines
+  s = s
+    .replace(/<br\s*\/?>/gi, "\n")
+    .replace(/<\/p>/gi, "\n")
+    .replace(/<\/div>/gi, "\n")
+    .replace(/<\/li>/gi, "\n");
+  // Strip all remaining tags
+  s = s.replace(/<[^>]+>/g, "");
+  return s.replace(/\n{3,}/g, "\n\n").trim();
 }