|
1 package html2text |
|
2 |
|
3 import ( |
|
4 "github.com/stretchr/testify/assert" |
|
5 "testing" |
|
6 ) |
|
7 |
|
8 func TestTextify(t *testing.T) { |
|
9 expected := "body\nbody2" |
|
10 r, e := Textify("<html><body><b>body</b><br/>body2</body></html>") |
|
11 assert.Nil(t, e) |
|
12 assert.Equal(t, expected, r) |
|
13 } |
|
14 |
|
15 func TestTextifyDiv(t *testing.T) { |
|
16 expected := "first\nsecond" |
|
17 r, e := Textify("<div>first</div>second") |
|
18 assert.Nil(t, e) |
|
19 assert.Equal(t, expected, r) |
|
20 } |
|
21 |
|
22 /* |
|
23 func TestTextifyLink(t *testing.T) { |
|
24 expected := "somelink (link: someurl)" |
|
25 r, e := Textify("<a href=\"someurl\">somelink</a>") |
|
26 assert.Nil(t, e) |
|
27 assert.Equal(t, expected, r) |
|
28 } |
|
29 */ |
|
30 |
|
31 func TestTextifyDontDuplicateLink(t *testing.T) { |
|
32 expected := "www.awesome.com" |
|
33 r, e := Textify("<a href=\"www.awesome.com\">www.awesome.com</a>") |
|
34 assert.Nil(t, e) |
|
35 assert.Equal(t, expected, r) |
|
36 } |
|
37 |
|
38 func TestTextifySpaces(t *testing.T) { |
|
39 expected := "hello" |
|
40 r, e := Textify("<div> hello </div>") |
|
41 assert.Nil(t, e) |
|
42 assert.Equal(t, expected, r) |
|
43 } |
|
44 |
|
45 /* I don't think we want that for Mastodon... |
|
46 func TestTextifySpacesMultiple(t *testing.T) { |
|
47 expected := "hello goodbye" |
|
48 r, e := Textify("<span> hello </span><span> goodbye </span>") |
|
49 assert.Nil(t, e) |
|
50 assert.Equal(t, expected, r) |
|
51 } |
|
52 */ |
|
53 |
|
54 func TestTextifyNonBreakingSpace(t *testing.T) { |
|
55 expected := "a a" |
|
56 r, e := Textify("a a") |
|
57 assert.Equal(t, expected, r) |
|
58 assert.Nil(t, e) |
|
59 } |
|
60 |
|
61 func TestTextifyLimitedNewLines(t *testing.T) { |
|
62 expected := "abc\nxyz" |
|
63 r, e := Textify("abc <br/> <br/> <br/> <br/>xyz") |
|
64 assert.Nil(t, e) |
|
65 assert.Equal(t, expected, r) |
|
66 } |
|
67 |
|
68 func TestTextifyTable(t *testing.T) { |
|
69 expected := `Join by phone |
|
70 1-877-668-4490 Call-in toll-free number (US/Canada) |
|
71 1-408-792-6300 Call-in toll number (US/Canada) |
|
72 Access code: 111 111 111 |
|
73 https://akqa.webex.com/akqa/globalcallin.php?serviceType=MC&ED=299778282&tollFree=1 | http://www.webex.com/pdf/tollfree_restrictions.pdf` |
|
74 |
|
75 test := `<table width="747" style="width:448.2pt;"> <col width="747" style="width:448.2pt;"> <tbody> <tr> <td><font face="Arial" color="#666666"><b>Join by phone</b></font></td> </tr> <tr> <td><font face="Arial" size="3" color="#666666"><span style="font-size:11.5pt;"><b>1-877-668-4490</b> Call-in toll-free number (US/Canada)</span></font></td> </tr> <tr> <td><font face="Arial" size="3" color="#666666"><span style="font-size:11.5pt;"><b>1-408-792-6300</b> Call-in toll number (US/Canada)</span></font></td> </tr> <tr> <td><font face="Arial" size="3" color="#666666"><span style="font-size:11.5pt;">Access code: 111 111 111</span></font></td> </tr> <tr> <td><a href="https://akqa.webex.com/akqa/globalcallin.php?serviceType=MC&ED=299778282&tollFree=1"><font face="Arial" size="2" color="#00AFF9"><span style="font-size:10pt;"><u>Global call-in numbers</u></span></font></a><font face="Arial" size="3" color="#666666"><span style="font-size:11.5pt;"> | </span></font><a href="http://www.webex.com/pdf/tollfree_restrictions.pdf"><font face="Arial" size="2" color="#00AFF9"><span style="font-size:10pt;"><u>Toll-free calling restrictions</u></span></font></a></td> </tr> </tbody> </table>` |
|
76 |
|
77 r, e := Textify(test) |
|
78 assert.Nil(t, e) |
|
79 assert.Equal(t, expected, r) |
|
80 } |
|
81 |
|
82 func TestTextifyComment(t *testing.T) { |
|
83 expected := "this should appear" |
|
84 r, e := Textify("<!-- this should not appear -->this should appear") |
|
85 assert.Nil(t, e) |
|
86 assert.Equal(t, expected, r) |
|
87 } |
|
88 |
|
89 func TestTextifyCommentInHead(t *testing.T) { |
|
90 expected := "qwerty" |
|
91 |
|
92 body := `<html> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> <meta name="Generator" content="Microsoft Exchange Server"> <!-- converted from rtf --><style><!-- .EmailQuote { margin-left: 1pt; padding-left: 4pt; border-left: #800000 2px solid; } --></style> </head> <body>qwerty</body> </html>` |
|
93 |
|
94 r, e := Textify(body) |
|
95 assert.Nil(t, e) |
|
96 assert.Equal(t, expected, r) |
|
97 } |
|
98 |
|
99 func TestTextifyLists(t *testing.T) { |
|
100 expected := "a\nb\n1\n2" |
|
101 |
|
102 body := `<ol><li>a</li><li>b</li></ol><ul><li>1</li><li>2</li></ul>` |
|
103 |
|
104 r, e := Textify(body) |
|
105 assert.Nil(t, e) |
|
106 assert.Equal(t, expected, r) |
|
107 } |
|
108 |
|
109 func TestTextifyMastodonSample1(t *testing.T) { |
|
110 expected := "@magi hello \\U0001F607 @TEST" |
|
111 |
|
112 body := `<p><span class=\"h-card\"><a href=\"https://example.com/@magi\">@<span>magi</span></a></span> hello \U0001F607 <span class=\"h-card\"><a href=\"https://example.com/@TEST\">@<span>TEST</span></a></span></p>` |
|
113 |
|
114 r, e := Textify(body) |
|
115 assert.Nil(t, e) |
|
116 assert.Equal(t, expected, r) |
|
117 } |
|
118 |
|
119 func TestTextifyMastodonSample2(t *testing.T) { |
|
120 expected := "@cadey It looks good at first glance\n\"case <-stop\" Actually you don't listen to stop channel, you close it if you want to stop the listener." |
|
121 |
|
122 body := `<p><span class="h-card"><a href="https://www.example.com/@cadey" class="u-url mention">@<span>cadey</span></a></span> It looks good at first glance</p><p>"case <-stop" Actually you don't listen to stop channel, you close it if you want to stop the listener.</p>` |
|
123 |
|
124 r, e := Textify(body) |
|
125 assert.Nil(t, e) |
|
126 assert.Equal(t, expected, r) |
|
127 } |
|
128 |
|
129 func TestTextifyMastodonSample3(t *testing.T) { |
|
130 expected := "From timeline: Materials research creates potential for improved computer chips and transistors #phys #physics ..." |
|
131 |
|
132 body := `From timeline: Materials research creates potential for improved computer chips and transistors #<span class="tag"><a href="https://social.oalm.gub.uy/tag/phys">phys</a></span> #<span class="tag"><a href="https://social.oalm.gub.uy/tag/physics">physics</a></span><p>...</p>` |
|
133 |
|
134 r, e := Textify(body) |
|
135 assert.Nil(t, e) |
|
136 assert.Equal(t, expected, r) |
|
137 } |
|
138 |
|
139 func TestTextifyMastodonSample4(t *testing.T) { |
|
140 expected := "Vous reprendrez bien un peu de #Tolkein ?\n#Arte +7 propose un ensemble de 6 vidéos en plus du documentaire:\nhttp://www.arte.tv/fr/videos/RC-014610/tolkien/" |
|
141 |
|
142 body := `<p>Vous reprendrez bien un peu de <a href="https://framapiaf.org/tags/tolkein">#<span>Tolkein</span></a> ?<br><a href="https://framapiaf.org/tags/arte">#<span>Arte</span></a>+7 propose un ensemble de 6 vidéos en plus du documentaire:</p><p><a href="http://www.arte.tv/fr/videos/RC-014610/tolkien/"><span class="invisible">http://www.</span><span class="ellipsis">arte.tv/fr/videos/RC-014610/to</span><span class="invisible">lkien/</span></a></p>` |
|
143 |
|
144 r, e := Textify(body) |
|
145 assert.Nil(t, e) |
|
146 assert.Equal(t, expected, r) |
|
147 } |
|
148 |
|
149 func TestTextifyMastodonMentionAndTag(t *testing.T) { |
|
150 expected := "@ACh Mais heu ! Moi aussi je fais du #TootRadio de gens morts il y a 5 siècles. Gesulado, Charpentier, Mireille Mathieu..." |
|
151 |
|
152 body := `<p><span class="h-card"><a href="https://mamot.fr/@ACh">@<span>ACh</span></a></span> Mais heu ! Moi aussi je fais du <a href="https://example.com/tags/tootradio">#<span>TootRadio</span></a> de gens morts il y a 5 siècles. Gesulado, Charpentier, Mireille Mathieu...</p>` |
|
153 |
|
154 r, e := Textify(body) |
|
155 assert.Nil(t, e) |
|
156 assert.Equal(t, expected, r) |
|
157 } |
|
158 |
|
159 func TestTextifyMastodonLinkSpacing(t *testing.T) { |
|
160 expected := "\"Twitter\" https://twitter.com/holly/status/123456789012345678" |
|
161 |
|
162 body := `<p>"Twitter" <a href="https://twitter.com/holly/status/123456789012345678"><span class="invisible">https://</span><span class="ellipsis">twitter.com/holly/status/86266</span><span class="invisible">1234567890123</span></a></p>` |
|
163 |
|
164 r, e := Textify(body) |
|
165 assert.Nil(t, e) |
|
166 assert.Equal(t, expected, r) |
|
167 } |
|
168 |
|
169 func TestTextifyMastodonMentionGNUSocial(t *testing.T) { |
|
170 expected := "@username Hello." |
|
171 |
|
172 body := `@<a href="https://example.com/user/12345">username</a> Hello.` |
|
173 |
|
174 r, e := Textify(body) |
|
175 assert.Nil(t, e) |
|
176 assert.Equal(t, expected, r) |
|
177 } |