Scraping of protected email
From the CF tag, in your supplied html, I assume you are scraping a cloudflare site. They offer a feature to obfuscate emails listed (see here) which encrypts the addresses in the HTML and using JavaScript decrypts them. Hence, using selenium you'll see email-addresses but using requests you won't.
Since the decryption method can be easily taken from the JavaScript, you can write your own decryption method in Python.
In JavaScript,
(function () { try { var s, a, i, j, r, c, l = document.getElementById("__cf_email__"); a = l.className; if (a) { s = ''; r = parseInt(a.substr(0, 2), 16); for (j = 2; a.length - j; j += 2) { c = parseInt(a.substr(j, 2), 16) ^ r; s += String.fromCharCode(c); } s = document.createTextNode(s); l.parentNode.replaceChild(s, l); } } catch (e) {}})();
In Python,
def decodeEmail(e): de = "" k = int(e[:2], 16) for i in range(2, len(e)-1, 2): de += chr(int(e[i:i+2], 16)^k) return de
Code In all Languages is here:
Javascript
function cfDecodeEmail(encodedString) { var email = "", r = parseInt(encodedString.substr(0, 2), 16), n, i; for (n = 2; encodedString.length - n; n += 2){ i = parseInt(encodedString.substr(n, 2), 16) ^ r; email += String.fromCharCode(i); } return email;}console.log(cfDecodeEmail("543931142127353935313e352e7a373b39")); // usage
Python
def cfDecodeEmail(encodedString): r = int(encodedString[:2],16) email = ''.join([chr(int(encodedString[i:i+2], 16) ^ r) for i in range(2, len(encodedString), 2)]) return emailprint cfDecodeEmail('543931142127353935313e352e7a373b39') # usage
PHP
function cfDecodeEmail($encodedString){ $k = hexdec(substr($encodedString,0,2)); for($i=2,$email='';$i<strlen($encodedString)-1;$i+=2){ $email.=chr(hexdec(substr($encodedString,$i,2))^$k); } return $email;}echo cfDecodeEmail('543931142127353935313e352e7a373b39'); // usage
GO
package mainimport ( "bytes" "strconv")func cf(a string) (s string) { var e bytes.Buffer r, _ := strconv.ParseInt(a[0:2], 16, 0) for n := 4; n < len(a)+2; n += 2 { i, _ := strconv.ParseInt(a[n-2:n], 16, 0) e.WriteString(string(i ^ r)) } return e.String()}func main() { email := cf("543931142127353935313e352e7a373b39") // usage print(email) print("\n")}
C++
#include <iostream>#include <string>using namespace std;string cfDecodeEmail(string encodedString);int main(){ cout << cfDecodeEmail("543931142127353935313e352e7a373b39") << endl;}string cfDecodeEmail(string encodedString){ string email; char xorKey = stoi( encodedString.substr(0, 2), nullptr, 16); for( unsigned i = 2; i < encodedString.length(); i += 2) email += stoi( encodedString.substr(i, 2), nullptr, 16) ^ xorKey; return email;}
C#
using System;public class Program{ public static string cfDecodeEmail(string encodedString) { string email = ""; int r = Convert.ToInt32(encodedString.Substring(0, 2), 16), n, i; for (n = 2; encodedString.Length - n > 0; n += 2) { i = Convert.ToInt32(encodedString.Substring(n, 2), 16) ^ r; char character = (char)i; email += Convert.ToString(character); } return email; } public static void Main(string[] args) { Console.WriteLine(cfDecodeEmail("543931142127353935313e352e7a373b39")); // usage }}
According to above algorithm, I wrote code in Ruby to parse [protected email] with nokogiri
def decode_email(e) r = Integer(e[0,2], 16) (2..e.length - 2).step(2).map do |j| c = Integer(e[j,2], 16) ^ r c.chr end.join('')end