Get only HTML <head> Element with a Script or Tool Get only HTML <head> Element with a Script or Tool curl curl

Get only HTML <head> Element with a Script or Tool


It is not possible to load only the data between the <head> tags because the server would have to parse the requested page before sending it.

A possible solution would read a few bytes until a </head> tag is found.

The following reads n bytes from the source and checks if the string </head> is included. If so, the bytes are converted to string and trimmed such that the result contains the tags <head> and </head> as well as the data between them. Otherwise it continues to read n bytes until </head> is found.

import urllib.requestdef get_head_tag_data(url, n=512):    """Read n bytes form source until '</head> is included. Trim result to       '<head> ... </head>' and return it as string."""    # open resource    with urllib.request.urlopen(url) as site:        # read n bytes until `buff` includes "</head>"        data = b''        i = 1        while True:            buff = site.read(n)            data += buff            if b'</head>' in buff:                break            elif buff == b'':                raise AttributeError('Not head-tag found.')            i += 1    print('{} bytes read'.format(n*i))    # cast to string    data = str(data)    # detect tag position    start_tag = data.find('<head>')    end_tag = data.find('</head>') + 7    return data[start_tag:end_tag]tag_data = get_head_tag_data('https://stackoverflow.com', n=256)

Note that this functions does not check for possible erros, for example if there is no </head> tag.


You can try BeautifulSoap. Here is an example Python script.

import urllib.request as urllib2from bs4 import BeautifulSoup as bsurl = 'change_wiht_your_desired_url' # www.https://stackoverflow.com/questions/48262523/get-only-html-head-element-with-a-script-or-tool page = urllib2.urlopen(url)soup = bs(page, 'html.parser')soup.head

You can get different tags using the soup object. Read the full docs here. I hope this helps.

EDIT

print(soup.head) outputs

<head><title>Memory Leak with import_array() for numpy Python3.5 - Stack Overflow</title><link href="https://cdn.sstatic.net/Sites/stackoverflow/img/favicon.ico?v=4f32ecc8f43d" rel="shortcut icon"/><link href="https://cdn.sstatic.net/Sites/stackoverflow/img/apple-touch-icon.png?v=c78bd457575a" rel="apple-touch-icon image_src"/><link href="/opensearch.xml" rel="search" title="Stack Overflow" type="application/opensearchdescription+xml"/><meta content="website" property="og:type"/><meta content="https://stackoverflow.com/questions/48200892/memory-leak-with-import-array-for-numpy-python3-5" property="og:url"/><meta content="https://cdn.sstatic.net/Sites/stackoverflow/img/apple-touch-icon@2.png?v=73d79a89bded" itemprop="image primaryImageOfPage" property="og:image"/><meta content="summary" name="twitter:card"/><meta content="stackoverflow.com" name="twitter:domain"/><meta content="Memory Leak with import_array() for numpy Python3.5" itemprop="title name" name="twitter:title" property="og:title"/><meta content="Could someone suggest a fix for this problem?When I use import_array(), Valgrind reports memory leak of 157528 bytes.Here is the small piece of code to replicate the problem on Ubuntu16.04 and P..." itemprop="description" name="twitter:description" property="og:description"/><script src="https://ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js"></script><script src="https://cdn.sstatic.net/Js/stub.en.js?v=10cafd98c67a"></script><link href="https://cdn.sstatic.net/Sites/stackoverflow/all-primary.css?v=44b77c3d4e1c" rel="stylesheet" type="text/css"/><link href="/feeds/question/48200892" rel="alternate" title="Feed for question 'Memory Leak with import_array() for numpy Python3.5'" type="application/atom+xml"/><meta content="US" name="twitter:app:country"/><meta content="Stack Exchange iOS" name="twitter:app:name:iphone"/><meta content="871299723" name="twitter:app:id:iphone"/><meta content="se-zaphod://stackoverflow.com/questions/48200892/memory-leak-with-import-array-for-numpy-python3-5" name="twitter:app:url:iphone"/><meta content="Stack Exchange iOS" name="twitter:app:name:ipad"/><meta content="871299723" name="twitter:app:id:ipad"/><meta content="se-zaphod://stackoverflow.com/questions/48200892/memory-leak-with-import-array-for-numpy-python3-5" name="twitter:app:url:ipad"/><meta content="Stack Exchange Android" name="twitter:app:name:googleplay"/><meta content="http://stackoverflow.com/questions/48200892/memory-leak-with-import-array-for-numpy-python3-5" name="twitter:app:url:googleplay"/><meta content="com.stackexchange.marvin" name="twitter:app:id:googleplay"/><script>            StackExchange.ready(function () {                    StackExchange.using("snippets", function () {                        StackExchange.snippets.initSnippetRenderer();                    });                StackExchange.using("postValidation", function () {                    StackExchange.postValidation.initOnBlurAndSubmit($('#post-form'), 2, 'answer');                });                StackExchange.question.init({showAnswerHelp:true,totalCommentCount:2,shownCommentCount:2,highlightColor:'#F4A83D',backgroundColor:'#FFF',questionId:48200892});                styleCode();                    StackExchange.realtime.subscribeToQuestion('1', '48200892');                                                                    StackExchange.using("gps", function () { StackExchange.gps.trackOutboundClicks('#content', '.post-text'); });            });        </script><script>        StackExchange.init({"locale":"en","serverTime":1516020520,"routeName":"Questions/Show","stackAuthUrl":"https://stackauth.com","networkMetaHostname":"meta.stackexchange.com","site":{"name":"Stack Overflow","description":"Q&A for professional and enthusiast programmers","isNoticesTabEnabled":true,"recaptchaPublicKey":"6LdchgIAAAAAAJwGpIzRQSOFaO0pU6s44Xt8aTwc","recaptchaAudioLang":"en","enableNewTagCreationWarning":true,"insertSpaceAfterNameTabCompletion":false,"id":1,"childUrl":"https://meta.stackoverflow.com","enableSocialMediaInSharePopup":true,"protocol":"https"},"user":{"fkey":"b01c42fe577b568489e86923067aebbd","tid":"5b158809-5378-6383-c1bd-bdf22c97f475","rep":0,"isAnonymous":true,"isAnonymousNetworkWide":true,"canSeeNewHeaderDesign":true},"events":{"postType":{"question":1},"postEditionSection":{"title":1,"body":2,"tags":3}},"story":{"minCompleteBodyLength":75,"likedTagsMaxLength":300,"dislikedTagsMaxLength":300},"jobPreferences":{"maxNumDeveloperRoles":2,"maxNumIndustries":4}}, {"site":{"allowImageUploads":true,"enableUserHovercards":true,"styleCode":true,"enableImgurHttps":true,"forceHttpsImages":true},"comments":{},"userProfile":{"openGraphAPIKey":"58740831ad23540e00c58987"},"tags":{},"accounts":{"currentPasswordRequiredForChangingStackIdPassword":true},"flags":{"allowRetractingFlags":true},"topBar":{"showNewFeatureNotice":true},"snippets":{"snippetsEnabled":true,"renderDomain":"stacksnippets.net"},"paths":{},"markdown":{"asteriskIntraWordEmphasis":true},"monitoring":{"clientTimingsAbsoluteTimeout":30000,"clientTimingsDebounceTimeout":1000}});        StackExchange.using.setCacheBreakers({"js/prettify-full.en.js":"653f3a9edf23","js/moderator.en.js":"22b640565fb8","js/full-anon.en.js":"448b407c0535","js/full.en.js":"b5454c77884f","js/wmd.en.js":"70a0e707c944","js/third-party/jquery.autocomplete.min.js":"d3b8fa7fdf74","js/third-party/jquery.autocomplete.min.en.js":"","js/mobile.en.js":"8e20e188854d","js/help.en.js":"890f7bf1827b","js/tageditor.en.js":"68e773dc21b3","js/tageditornew.en.js":"c77ac7fa331f","js/inline-tag-editing.en.js":"681a5e3ebd00","js/revisions.en.js":"2faaeaae2529","js/review.en.js":"1cbc9c06f708","js/tagsuggestions.en.js":"b278f9a0b23b","js/post-validation.en.js":"d8d9b527c3ea","js/explore-qlist.en.js":"88f824a42b1a","js/events.en.js":"9c2e85f6190f","js/keyboard-shortcuts.en.js":"a6f8e6251bbd","js/external-editor.en.js":"7a4d6f43f0bf","js/adops.en.js":"22a9bd59b1e9","js/external-editor.en.js":"7a4d6f43f0bf","js/snippet-javascript.en.js":"a3fb7827a7b4","js/snippet-javascript-codemirror.en.js":"72e55eacc0ed"});        StackExchange.using("gps", function() {             StackExchange.gps.init(true);        });    </script><noscript id="noscript-css"><style>body,.top-bar{margin-top:1.9em}</style></noscript></head>