From dea4a61cbf42ff422f1409d4342fa59017950526 Mon Sep 17 00:00:00 2001 From: Roger Gonzalez Date: Mon, 2 Nov 2020 18:37:23 -0300 Subject: Changed theme to Archie, moving blog to rogs.me --- content/_index.md | 121 +++++++++ content/posts.org | 208 ---------------- ...cy-appointment-thanks-to-python-and-selenium.md | 2 +- ...ow-to-scrape-your-auth0-database-with-django.md | 270 +++++++++++++++++++++ 4 files changed, 392 insertions(+), 209 deletions(-) create mode 100644 content/_index.md delete mode 100644 content/posts.org create mode 100644 content/posts/how-to-scrape-your-auth0-database-with-django.md (limited to 'content') diff --git a/content/_index.md b/content/_index.md new file mode 100644 index 0000000..fdb9433 --- /dev/null +++ b/content/_index.md @@ -0,0 +1,121 @@ +--- +title: "Home" +date: 2020-11-01T20:11:50-03:00 +lastmod: 2020-11-01T20:11:50-03:00 +tags : [ "dev", "hugo" ] +draft: false +--- +# Who am I? + +Hello world! I'm a Full-Stack web developer from Valencia, Venezuela, but now +living in [Montevideo, Uruguay](https://www.openstreetmap.org/relation/2929054). + +I have experience in front-end, back-end, and DevOps. New technologies fuel my +desire to learn more and more each day. I try to keep my code as clean as +possible, following established standards for the technology I'm using at the +moment. + +I love Open Source projects. Knowledge should be available to everyone who wants +it. + +You can check my resume in a more traditional format here: +[English](https://cloud.rogs.me/s/7Gyc9KmC7pxcmmG)| +[Spanish](https://cloud.rogs.me/s/4zHoy4Nd3SNgNKQ). + +# Experience + +## [Lazer Technologies](https://lazertechnologies.com/) +> September 2020 + +In Lazer Technologies we are working for [Certn](https://certn.co/). Certn is an +app that looks to ease the employers jobs of doing criminal background checks +for their employees. First, we built an app that acts as a bridge between our +main app and criminal background check providers (like the +[RCMP](https://www.rcmp-grc.gc.ca/)). Now we are working on a scraper for +multiple providers that don't have an API. In this project we are using Django, +Django REST Framework, Docker, PostgreSQL, Github Actions and Jenkins. + +## [Tarmac](https://tarmac.io) +> July 2020 + +I'm currently working on Tarmac on a project called "Volition". In Volition we +are developing a crawler that extracts information from different pages in order +to build a "super market place" for a specific product. In this project we are +using Docker, TypeScript, NodeJS, PostgreSQL, Google Cloud, and Kubernetes. + +## [Massive](https://massive.ag) +Senior Backend Developer + +> April 2019 - August 2020 + +I worked for Massive from April 2019 to August 2020. On my time in Massive, I +worked on one big project for Coca-Cola Mexico, called Tus Tapas Valen. "Tus +Tapas Valen" is an application that allowed clients to participate in promotions +and auctions, and win prizes. First I worked FrontEnd with ReactJS, Redux and +redux-observable, but now I'm working in the Backend. I had to plan a big +refactor while still working on new Backend functionalities. The Backend was +built using Python, Django, PostgreSQL, AWS, and AWS S3 + +## [Vascar Solutions](https://vascarsolutions.com/) +Backend Developer + +> December 2016 - June 2019 + +On my time in Vascar Solutions, I have been in many projects, most recently on +Knac and Axelerist. "Knac" is a job application startup, empathizing on +assessments to recommend candidates to a specific job. I worked on the backend +with NodeJS, Express, MongoDB, Mocha, CircleCI, Heroku and AWS. "Axelerist" is a +web app that connects to an external API and displays the client’s inventory in +a more friendly and ergonomic way. I worked on the backend, managing the API +connection, working on an API wrapper and user management. The app was made with +NodeJS, Express, MongoDB, Mocha, CircleCI, Heroku and AWS. + +# Education + +[**Univesidad Tecnológica del Centro (UNITEC)**](https://portal.unitec.edu.ve/) + +> September 2010 - December 2015 + +Engineering + +- Information Engineer + + Internship: Database system for Amcor Rigid Plastics de Venezuela + + Specializations: **1.** Web development, **2.** Automatization & **3.** Linux administration + + University groups: **1.** TecnoYucas, **2.** Centro de Investigación y + Tecnología (CIT), **3.** Centro de Tecnología y Robótica (CTR) + +# Skills +- **Programming Languages:** [Python](https://python.org/) | + [JavaScript](https://developer.mozilla.org/en-US/docs/Web/JavaScript) | + [Bash](https://www.gnu.org/software/bash/) +- **FrontEnd:** [HTML](https://html.spec.whatwg.org/multipage/) | + [CSS](https://developer.mozilla.org/en-US/docs/Web/CSS) | + [Bootstrap](https://getbootstrap.com/) | [ReactJS](https://reactjs.org/) +- **Backend:** [Django](https://www.djangoproject.com/) | [Django REST + Framework](https://www.django-rest-framework.org/) | + [NodeJS](https://nodejs.org/en/) | [Express](https://expressjs.com/) | + [Flask](https://flask.palletsprojects.com/en/1.1.x/) | + [MySQL](https://www.mysql.com/) | [PostgreSQL](https://www.postgresql.org/) | + [MongoDB](https://www.mongodb.com/) +- **Servers and Infrastructure:** [Amazon Web Services](https://aws.amazon.com/) + | [DigitalOcean](https://www.digitalocean.com/) | [Linode](https://linode.com) + | [Docker](https://www.docker.com/) | [Heroku](https://www.heroku.com/) | + [NGINX](https://nginx.org/) | [Apache](https://www.apache.org/) +- **Others:** [Linux](https://linux.org/) | + [Emacs](https://www.gnu.org/software/emacs/) | [Git](https://git-scm.com/) | + [Scrum](https://www.scrum.org/) | [CircleCI](https://circleci.com/) | + [Mocha](https://mochajs.org/) | [EsLint](https://eslint.org/) + +## Workflow +- REST API design +- Follow the ["Twelve factor app"](https://12factor.net/) +- Cross Functional Teams +- Agile Development & Scrum + +# Certifications +- English Course with practice & lesson sections, full English environment - +Berlitz English +- [Build a Backend REST API with Python & Django - Advanced - + Udemy](https://www.udemy.com/certificate/UC-A1CXJVDP/) +- [Build a Backend REST API with Python & Django - Beginner - + Udemy](https://www.udemy.com/certificate/UC-SNTLVIV0/) diff --git a/content/posts.org b/content/posts.org deleted file mode 100644 index 12b6eb8..0000000 --- a/content/posts.org +++ /dev/null @@ -1,208 +0,0 @@ -#+hugo_base_dir: ../ -#+hugo_section: ./posts - -#+hugo_weight: auto -#+hugo_auto_set_lastmod: t - -#+author: Roger Gonzalez - -* Programming :@programming: -All posts in here will have the category set to /programming/. -** How I got a residency appointment thanks to Python, Selenium and Telegram :python::selenium:telegram: -:PROPERTIES: -:EXPORT_FILE_NAME: how-i-got-a-residency-appointment-thanks-to-python-and-selenium -:EXPORT_DATE: 2020-08-02 -:END: -Hello everyone! - -As some of you might know, I'm a Venezuelan 🇻🇪 living in Montevideo, Uruguay 🇺🇾. -I've been living here for almost a year, but because of the pandemic my -residency appointments have slowed down to a crawl, and in the middle of the -quarantine they added a new appointment system. Before, there were no -appointments, you just had to get there early and wait for the secretary to -review your files and assign someone to attend you. But now, they had -implemented an appointment system that you could do from the comfort of your own -home/office. There was just one issue: *there were never appointments available*. - -That was a little stressful. I was developing a small /tick/ by checking the -site multiple times a day, with no luck. But then, I decided I wanted to do a -bot that checks the site for me, that way I could just forget about it and let -the computers do it for me. - -*** Tech -**** Selenium -I had some experience with Selenium in the past because I had to run automated -tests on an Android application, but I had never used it for the web. I knew it -supported Firefox and had an extensive API to interact with websites. In the -end, I just had to inspect the HTML and search for the "No appointments -available" error message. If the message wasn't there, I needed a way to be -notified so I can set my appointment as fast as possible. -**** Telegram Bot API -Telegram was my goto because I have a lot of experience with it. It has a -stupidly easy API that allows for superb bot management. I just needed the bot -to send me a message whenever the "No appointments available" message wasn't -found on the site. - -*** The plan -Here comes the juicy part: How is everything going to work together? - -I divided the work into four parts: -1) Inspecting the site -2) Finding the error message on the site -3) Sending the message if nothing was found -4) Deploy the job with a cronjob on my VPS - -*** Inspecting the site -Here is the site I needed to inspect: -- On the first site, I need to click the bottom button. By inspecting the HTML, - I found out that its name is ~form:botonElegirHora~ - [[/2020-08-02-171251.png]] -- When the button is clicked, it loads a second page that has an error message - if no appointments are found. The ID of that message is ~form:warnSinCupos~. - [[/2020-08-02-162205.png]] - -*** Using Selenium to find the error message -First, I needed to define the browser session and its settings. I wanted to run -it in headless mode so no X session is needed: -#+BEGIN_SRC python -from selenium import webdriver -from selenium.webdriver.firefox.options import Options - -options = Options() -options.headless = True -d = webdriver.Firefox(options=options) -#+END_SRC - -Then, I opened the site, looked for the button (~form:botonElegirHora~) and -clicked it -#+BEGIN_SRC python -# This is the website I wanted to scrape -d.get('https://sae.mec.gub.uy/sae/agendarReserva/Paso1.xhtml?e=9&a=7&r=13') -elem = d.find_element_by_name('form:botonElegirHora') -elem.click() -#+END_SRC - -And on the new page, I looked for the error message (~form:warnSinCupos~) -#+BEGIN_SRC python -try: - warning_message = d.find_element_by_id('form:warnSinCupos') -except Exception: - pass -#+END_SRC - -This was working exactly how I wanted: It opened a new browser session, opened -the site, clicked the button, and then looked for the message. For now, if the -message wasn't found, it does nothing. Now, the script needs to send me a -message if the warning message wasn't found on the page. - -*** Using Telegram to send a message if the warning message wasn't found -The Telegram bot API has a very simple way to send messages. If you want to read -more about their API, you can check it [[https://core.telegram.org/][here]]. - -There are a few steps you need to follow to get a Telegram bot: -1) First, you need to "talk" to the [[https://core.telegram.org/bots#6-botfather][Botfather]] to create the bot. -2) Then, you need to find your Telegram Chat ID. There are a few bots that can help - you with that, I personally use ~@get_id_bot~. -3) Once you have the ID, you should read the ~sendMessage~ API, since that's the - only one we need now. You can check it [[https://core.telegram.org/bots/api#sendmessage][here]]. - -So, by using the Telegram documentation, I came up with the following code: -#+BEGIN_SRC python -import requests - -chat_id = # Insert your chat ID here -telegram_bot_id = # Insert your Telegram bot ID here -telegram_data = { - "chat_id": chat_id - "parse_mode": "HTML", - "text": ("Hay citas!\nHay citas en el registro civil, para " - f"entrar ve a {SAE_URL}") -} -requests.post('https://api.telegram.org/bot{telegram_bot_id}/sendmessage', data=telegram_data) -#+END_SRC - -*** The complete script -I added a few loggers and environment variables and voilá! Here is the complete code: -#+BEGIN_SRC python -#!/usr/bin/env python3 - -import os -import requests -from datetime import datetime - -from selenium import webdriver -from selenium.webdriver.firefox.options import Options - -from dotenv import load_dotenv - -load_dotenv() # This loads the environmental variables from the .env file in the root folder - -TELEGRAM_BOT_ID = os.environ.get('TELEGRAM_BOT_ID') -TELEGRAM_CHAT_ID = os.environ.get('TELEGRAM_CHAT_ID') -SAE_URL = 'https://sae.mec.gub.uy/sae/agendarReserva/Paso1.xhtml?e=9&a=7&r=13' - -options = Options() -options.headless = True -d = webdriver.Firefox(options=options) -d.get(SAE_URL) -print(f'Headless Firefox Initialized {datetime.now()}') -elem = d.find_element_by_name('form:botonElegirHora') -elem.click() -try: - warning_message = d.find_element_by_id('form:warnSinCupos') - print('No dates yet') - print('------------------------------') -except Exception: - telegram_data = { - "chat_id": TELEGRAM_CHAT_ID, - "parse_mode": "HTML", - "text": ("Hay citas!\nHay citas en el registro civil, para " - f"entrar ve a {SAE_URL}") - } - requests.post('https://api.telegram.org/bot' - f'{TELEGRAM_BOT_ID}/sendmessage', data=telegram_data) - print('Dates found!') -d.close() # To close the browser connection -#+END_SRC - -Only one more thing to do, to deploy everything to my VPS - -*** Deploy and testing on the VPS -This was very easy. I just needed to pull my git repo, install the -~requirements.txt~ and set a new cron to run every 10 minutes and check the -site. The cron settings I used where: -#+BEGIN_SRC bash -*/10 * * * * /usr/bin/python3 /my/script/location/registro-civil-scraper/app.py >> /my/script/location/registro-civil-scraper/log.txt -#+END_SRC -The ~>> /my/script/location/registro-civil-scraper/log.txt~ part is to keep the logs on a new file. - -*** Did it work? -Yes! And it worked perfectly. I got a message the following day at 21:00 -(weirdly enough, that's 0:00GMT, so maybe they have their servers at GMT time -and it opens new appointments at 0:00). -[[/2020-08-02-170458.png]] - -*** Conclusion -I always loved to use programming to solve simple problems. With this script, I -didn't need to check the site every couple of hours to get an appointment, and -sincerely, I wasn't going to check past 19:00, so I would've never found it by -my own. - -My brother is having similar issues in Argentina, and when I showed him this, he -said one of the funniest phrases I've heard about my profession: - -> /"Programmers could take over the world, but they are too lazy"/ - -I lol'd way too hard at that. - -I loved Selenium and how it worked. Recently I created a crawler using Selenium, -Redis, peewee, and Postgres, so stay tuned if you want to know more about that. - -In the meantime, if you want to check the complete script, you can see it on my -Git instance: https://git.rogs.me/me/registro-civil-scraper or Gitlab, if you -prefer: https://gitlab.com/rogs/registro-civil-scraper - -* COMMENT Local Variables -# Local Variables: -# eval: (org-hugo-auto-export-mode) -# End: diff --git a/content/posts/how-i-got-a-residency-appointment-thanks-to-python-and-selenium.md b/content/posts/how-i-got-a-residency-appointment-thanks-to-python-and-selenium.md index fe7d46a..a080a6b 100644 --- a/content/posts/how-i-got-a-residency-appointment-thanks-to-python-and-selenium.md +++ b/content/posts/how-i-got-a-residency-appointment-thanks-to-python-and-selenium.md @@ -2,7 +2,7 @@ title = "How I got a residency appointment thanks to Python, Selenium and Telegram" author = ["Roger Gonzalez"] date = 2020-08-02 -lastmod = 2020-08-03T11:44:38-03:00 +lastmod = 2020-11-02T17:34:24-03:00 tags = ["python", "selenium", "telegram"] categories = ["programming"] draft = false diff --git a/content/posts/how-to-scrape-your-auth0-database-with-django.md b/content/posts/how-to-scrape-your-auth0-database-with-django.md new file mode 100644 index 0000000..885766f --- /dev/null +++ b/content/posts/how-to-scrape-your-auth0-database-with-django.md @@ -0,0 +1,270 @@ +--- +title: "How to create a management command and scrape your Auth0 database with Django" +date: 2020-06-30T09:40:48-03:00 +lastmod: 2020-06-30T09:40:48-03:00 +tags : [ "python", "django", "programming" ] +draft: true +--- + +Hello everyone! + +Some months ago, our data analyst needed to extract some data from Auth0 and match it with our profiles +database, so I decided we needed a new table to relate the Auth0 information with the profiles +information. + +# The solution + +This was a really easy but interesting task. The steps I came up with were: + +- Create a new model to save the data from Auth0 +- Create a new management command +- Create a cron to run the management command every night + +# Creating the model + +The model was really easy, I just created a field for each of the fields I want: + +```python +class Auth0Data(models.Model): + """Auth0 data straight from auth0""" + profile = models.ForeignKey(Profile, on_delete=models.CASCADE) + name = models.CharField(max_length=255, blank=True, null=True) + last_name = models.CharField(max_length=255, blank=True, null=True) + email = models.CharField(max_length=255, blank=True, null=True) + last_ip = models.CharField(max_length=255, blank=True, null=True) + login_count = models.CharField(max_length=255, blank=True, null=True) + last_login = models.DateTimeField(null=True, blank=True) + email_verified = models.BooleanField(default=False) + created_at = models.DateTimeField() + updated_at = models.DateTimeField() + + def __str__(self): + return str(self.profile) +``` + +# Creating the management command + +I want to run the command with a cron, so I need a way to run it outside of Django's runtime. I want the +command to run as `python manage.py scrape_auth0`. In order to do this, I have to create a specific +folder structure inside my application. + +Let's asume my Project is called `library` and my Django app is called `book`. I have to create the +following folder structure: +``` +├── library # my project root +│   ├── book # my app name +│   │   ├── management +│   │   │   ├── commands +│   │   │   │   ├── scrape_auth0.py +│   │   │   ├── __init__.py +``` + +First, let's create the folders we need. On the root of our application we can run: + +```sh +mkdir -p book/management/commands +touch book/management/commands/__init__.py +touch book/management/commands/scrape_auth0.py +``` + +In case you don't know, the `__init__.py` file is used to indentify Pyton packages. + +Now, you can open `scrape_auth0.py` on your text editor and start creating your command! + +The basic structure to create a command is: + +```python +from django.core.management.base import BaseCommand + +class Command(BaseCommand): + + def handle(self, *args, **options): + # my command here +``` + +What's going on here? +- First we create the class "Command", and inherit from `BaseCommand`. Every command has to inherit from it +- Inside the class, we need to override the `handle` function. That's where we are going to write our command. + +## The complete Auth0 command + +Here is the entire command: + +```python +import time +import csv +import requests +import gzip +import json +import datetime +import os + +from django.core.management.base import BaseCommand +from core.models import Profile, Auth0Data + +token = os.environ.get('AUTH0_MANAGEMENT_TOKEN') +headers = {'Authorization': f'Bearer {token}', + 'Content-Type': 'application/json'} + + +class Command(BaseCommand): + """Django command to load all the pincodes in the db""" + + def handle(self, *args, **options): + self.stdout.write('Scraping...') + self.stdout.write('Getting the connections...') + + connections = requests.get( + 'https://destapp.auth0.com/api/v2/connections', + headers=headers + ).json() + + self.stdout.write('Connections found!') + + for connection in connections: + connection_id = connection['id'] + connection_name = connection['name'] + self.stdout.write( + f'Working with connection {connection_name}, ' + f'{connection_id}') + + data = json.dumps({ + 'connection_id': connection_id, + 'format': 'csv', + 'limit': 99999999, + 'fields': [ + { + 'name': 'user_id' + }, + { + 'name': 'family_name' + }, + { + 'name': 'given_name' + }, + { + 'name': 'email' + }, + { + 'name': 'last_ip' + }, + { + 'name': 'logins_count' + }, + { + 'name': 'created_at' + }, + { + 'name': 'updated_at' + }, + { + 'name': 'last_login' + }, + { + 'name': 'email_verified' + } + ] + }) + + self.stdout.write('Generating job...') + job = requests.post( + 'https://destapp.auth0.com/api/v2/jobs/users-exports', + data=data, + headers=headers + ) + job_id = job.json()['id'] + self.stdout.write(f'The job ID is {job_id}') + time.sleep(5) + + job_is_running = True + + while job_is_running: + check_job = requests.get( + f'https://destapp.auth0.com/api/v2/jobs/{job_id}', + headers=headers + ).json() + + status = check_job['status'] + + if status == 'pending': + self.stdout.write('Job has not started, waiting') + time.sleep(30) + elif status == 'processing': + percentage_done = check_job['percentage_done'] + seconds_left = datetime.timedelta( + seconds=check_job['time_left_seconds']) + + self.stdout.write(f'Procesed: {percentage_done}%') + self.stdout.write(f'Time left: {seconds_left}') + time.sleep(10) + elif status == 'completed': + job_is_running = False + self.stdout.write('100%') + self.stdout.write('Data is ready!') + export_url = check_job['location'] + + export_data = requests.get(export_url, stream=True) + + file_location = 'core/management/commands/auth0_file.csv.gz' + + with open(file_location, 'wb') as f: + self.stdout.write('Downloading the file') + for chunk in export_data.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + f.flush() + self.stdout.write('File ready!') + f.close() + + with gzip.open(file_location, 'rt') as f: + reader = csv.reader(f, delimiter=',', quotechar='"') + for row in reader: + auth0_id = row[0].replace('|', '.') + last_name = row[1] + name = row[2] + email = row[3] + last_ip = row[4] + login_count = row[5] + created_at = row[6] + updated_at = row[7] + last_login = None + if row[8] != '': + last_login = row[8] + email_verified = False + if row[9] == 'true': + email_verified = True + + try: + profile = Profile.objects.get(auth0_id=auth0_id) + auth0_data = Auth0Data.objects.get(profile=profile) + + auth0_data.name = name + auth0_data.last_name = last_name + auth0_data.email = email + auth0_data.last_ip = last_ip + auth0_data.login_count = login_count + auth0_data.created_at = created_at + auth0_data.updated_at = updated_at + auth0_data.last_login = last_login + auth0_data.email_verified = email_verified + + auth0_data.save() + self.stdout.write(f'Updated Auth0Data for {profile}') + + except Auth0Data.DoesNotExist: + Auth0Data.objects.create( + profile=profile, + name=name, + last_name=last_name, + email=email, + last_ip=last_ip, + login_count=login_count, + created_at=created_at, + updated_at=updated_at, + last_login=last_login, + email_verified=email_verified + ) + self.stdout.write(f'Created Auth0Data for {profile}') + except Profile.DoesNotExist: + pass +``` -- cgit v1.2.3