Python Beautifulsoup With Authenticated Scraping

BeautifulSoup is a great tool for scraping a website and gathering content from web.  Without hassle we can scrape the content from static website anyway site like gitlab, github or facebook is not that easy to scrape the content. Here we will blocked by login screen so we cannot further continue to scrape the content.

Here we need to logged in with user credential then only we can scrape the content. In this blog we will see how to crack this problem with BS4 and requests module.

import requests
import getpass
import datetime
from datetime import timedelta

from lxml import html


BASE_URL = "https://gitlab_url"
LOGIN_URL = BASE_URL + "/users/sign_in"

def main(*args):
    session_requests = requests.session()
    email = input()
    print("your username")
    user_id = input()
    password = getpass.getpass()
    CALENDER_URL = BASE_URL + "/users/"+ user_id +"/calendar.json"
    today =
    yesterday = today - timedelta(days=1)
    today = today.strftime("%Y-%m-%d")
    yesterday = yesterday.strftime("%Y-%m-%d")
    result = session_requests.get(LOGIN_URL, verify=False)
    tree = html.fromstring(result.text)
    authenticity_token = list(set(tree.xpath('//*[@name="authenticity_token"]//@value')))[0]
    payload = {
    "user[login]": email,
    "user[password]": password,
    "authenticity_token": authenticity_token
    logged_ =, data = payload, headers = dict(referer = LOGIN_URL), verify=False)
    calender = session_requests.get(CALENDER_URL, headers = dict(referer = CALENDER_URL), verify=False).json()
    print("Yesterday commits --> ", calender.get(yesterday,"No Records :-/"))
    print("Today commits --> ", calender.get(today,"No Records :-/"))
    print("Total commits --> ",sum(calender.values()))

if __name__ == '__main__':