author | Matthew Wild <mwild1@gmail.com> |
Wed, 13 Jul 2022 11:14:04 +0100 | |
changeset 4999 | cb3de818ff55 |
parent 4259 | 38da10e4b593 |
permissions | -rw-r--r-- |
4258
a4e182d7ff0a
mod_ogp: Improve parsing patterns
Seve Ferrer <seve@delape.net>
parents:
diff
changeset
|
1 |
local html = [[ |
4259
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
2 |
<meta property="og:title" content="Example 1 A"> |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
3 |
<meta property=og:title content="Example 2 B"> |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
4 |
<meta property="og:title" content="Example 3 C" > |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
5 |
<meta property="og:title" content="Example 4 D" /> |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
6 |
<meta property="og:title" content="Example 5 E"/> |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
7 |
<meta property=og:title content=Example 6 F/> |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
8 |
<meta property="og:title" content= "Example 7 G" /> |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
9 |
<meta property="og:title" itemprop="image primaryImageOfPage" content="Example 8 H" /> |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
10 |
<meta property='og:title' content='Example 9 I' /> |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
11 |
<meta content="Example 10 J" property="og:title" > |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
12 |
<meta content="Example 11 K" property="og:title"> |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
13 |
<meta content="Example 12 L" property="og:title"/> |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
14 |
<meta content="Example 13 M" property="og:title" /> |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
15 |
<meta content="Example 14 N" property=og:title > |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
16 |
<meta content=Example 15 O property=og:title > |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
17 |
<meta content= "Example 16 P" property="og:title" /> |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
18 |
<meta content="Example 17 Q" itemprop="image primaryImageOfPage" property="og:title" /> |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
19 |
<meta content= 'Example 18 R' property='og:title' /> |
4258
a4e182d7ff0a
mod_ogp: Improve parsing patterns
Seve Ferrer <seve@delape.net>
parents:
diff
changeset
|
20 |
]] |
a4e182d7ff0a
mod_ogp: Improve parsing patterns
Seve Ferrer <seve@delape.net>
parents:
diff
changeset
|
21 |
|
a4e182d7ff0a
mod_ogp: Improve parsing patterns
Seve Ferrer <seve@delape.net>
parents:
diff
changeset
|
22 |
|
4259
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
23 |
|
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
24 |
local meta_pattern = [[<meta (.-)/?>]] |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
25 |
for match in html:gmatch(meta_pattern) do |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
26 |
local property = match:match([[property=%s*["']?(og:.-)["']?%s]]) |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
27 |
if not property then |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
28 |
property = match:match([[property=["']?(og:.-)["']$]]) |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
29 |
end |
4258
a4e182d7ff0a
mod_ogp: Improve parsing patterns
Seve Ferrer <seve@delape.net>
parents:
diff
changeset
|
30 |
|
4259
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
31 |
local content = match:match([[content=%s*["'](.-)["']%s]]) |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
32 |
if not content then |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
33 |
content = match:match([[content=["']?(.-)["']$]]) |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
34 |
end |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
35 |
if not content then |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
36 |
content = match:match([[content=(.-) property]]) |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
37 |
end |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
38 |
if not content then |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
39 |
content = match:match([[content=(.-)$]]) |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
40 |
end |
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
41 |
|
38da10e4b593
mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents:
4258
diff
changeset
|
42 |
print(property, '\t', content, '\t', match .. "|") |
4258
a4e182d7ff0a
mod_ogp: Improve parsing patterns
Seve Ferrer <seve@delape.net>
parents:
diff
changeset
|
43 |
end |