It seems that contains(text(), 'TARGET_STRING')
works fine with html_nodes
as well as the "or operator": "|", but it does not work together.
Reproducible example:
html <- "<a>a</a><p>abc</p>"
xp <- "//*[self::a|self::b]" # or operator works
xp2 <- "//*[contains(text(),'abc')]" # contains text works
# but it doesnt work together
xp3 <- "//*[self::a|contains(text(),'abc')]"
html_nodes(x = read_html(html), xpath = xp)
html_nodes(x = read_html(html), xpath = xp2)
# this one fails
html_nodes(x = read_html(html), xpath = xp3)