mod_ogp/test.lua
author Matthew Wild <mwild1@gmail.com>
Sat, 24 Sep 2022 09:25:46 +0100
changeset 5062 39c2824c2880
parent 4259 38da10e4b593
permissions -rw-r--r--
mod_cloud_notify: README overhaul
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
4258
a4e182d7ff0a mod_ogp: Improve parsing patterns
Seve Ferrer <seve@delape.net>
parents:
diff changeset
     1
local html = [[
4259
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
     2
<meta property="og:title" content="Example 1 A">
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
     3
<meta property=og:title content="Example 2 B">
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
     4
<meta property="og:title" content="Example 3 C" >
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
     5
<meta property="og:title" content="Example 4 D" />
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
     6
<meta property="og:title" content="Example 5 E"/>
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
     7
<meta property=og:title content=Example 6 F/>
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
     8
<meta property="og:title" content= "Example 7 G" />
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
     9
<meta property="og:title" itemprop="image primaryImageOfPage" content="Example 8 H" />
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    10
<meta property='og:title' content='Example 9 I' />
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    11
<meta content="Example 10 J" property="og:title" >
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    12
<meta content="Example 11 K" property="og:title">
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    13
<meta content="Example 12 L" property="og:title"/>
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    14
<meta content="Example 13 M" property="og:title" />
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    15
<meta content="Example 14 N" property=og:title >
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    16
<meta content=Example 15 O property=og:title >
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    17
<meta content= "Example 16 P" property="og:title" />
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    18
<meta content="Example 17 Q" itemprop="image primaryImageOfPage"  property="og:title" />
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    19
<meta content= 'Example 18 R' property='og:title' />
4258
a4e182d7ff0a mod_ogp: Improve parsing patterns
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    20
]]
a4e182d7ff0a mod_ogp: Improve parsing patterns
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    21
a4e182d7ff0a mod_ogp: Improve parsing patterns
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    22
4259
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    23
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    24
local meta_pattern = [[<meta (.-)/?>]]
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    25
for match in html:gmatch(meta_pattern) do
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    26
    local property = match:match([[property=%s*["']?(og:.-)["']?%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    27
    if not property then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    28
        property = match:match([[property=["']?(og:.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    29
    end
4258
a4e182d7ff0a mod_ogp: Improve parsing patterns
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    30
4259
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    31
    local content = match:match([[content=%s*["'](.-)["']%s]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    32
    if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    33
        content = match:match([[content=["']?(.-)["']$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    34
    end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    35
    if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    36
        content = match:match([[content=(.-) property]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    37
    end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    38
    if not content then
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    39
        content = match:match([[content=(.-)$]])
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    40
    end
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    41
38da10e4b593 mod_ogp: Update parsing logic to catch more cases
Seve Ferrer <seve@delape.net>
parents: 4258
diff changeset
    42
    print(property, '\t', content, '\t', match .. "|")
4258
a4e182d7ff0a mod_ogp: Improve parsing patterns
Seve Ferrer <seve@delape.net>
parents:
diff changeset
    43
end