I'm making a program to scrape some websites, and I'm finding a problem when scraping one of them. On the others I've found my way using Selenium + BS4 to get the information I need and navigating the pages.
The page is this one: https://www.borm.es/#/home/sumario/21-11-2020
Now, the objective is to get all the paragraphs from the class: ng-binding, and the links of each "VER ANUNCIO" that each one has below them.
Usually I would use soup.find_all() to get all of them and navigate the tree or use Selenium to get all the elements using XPATH/CSS SELECTOR.
The problem I'm facing is that find_all(), or find() is returning nothing, (empty list or None), and Selenium returns None too.
I've tried checking if the elements are inside a frame, which I think they're not. I've tried WebDriver wait to see if the page should stop to load before doing something. Different classes/tags give same result.
Now, when I print the BeautifulSoup object, it returns this instead of the HTML code I see inspecting the page:
<!DOCTYPE html>
<html data-ng-app="BormApp" lang="es">
<head>
<title>Sede Electrónica del Boletín Oficial de la Región de Murcia</title>
<meta content="zjUBcjStVhysvi2ANOAn6-FG7aKKif43J62Ifad9JjA" name="google-site-verification"/>
<!-- META ROBOTS -->
<meta content="index,follow,all" name="robots"/>
<!-- METAS GENÉRICAS -->
<meta charset="utf-8"/>
<base href="/"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="Sede Electrónica del Boletín Oficial de la Región de Murcia" name="title"/>
<meta content="Sede Electrónica del Boletín Oficial de la Región de Murcia" name="description"/>
<meta content="borm" name="author"/>
<!-- CARGA DE FUENTES -->
<link href="https://fonts.googleapis.com/css?family=Open+Sans:300,400,700" rel="stylesheet"/>
<link href="https://fonts.googleapis.com/css?family=Coiny" rel="stylesheet"/>
<!-- CSS -->
<link href="resources/css/main.css" media="screen" rel="stylesheet"/>
<link href="resources/css/bootstrap.min.css" media="screen" rel="stylesheet"/>
<link href="resources/css/estilos.css" media="screen" rel="stylesheet"/>
<link href="resources/css/imprimir.css" media="print" rel="stylesheet"/>
<link crossorigin="anonymous" href="https://use.fontawesome.com/releases/v5.0.13/css/all.css" integrity="sha384-DNOHZ68U8hZfKXOrtjWvjxusGo9WQnrNx2sqG0tfsghAvtVlRW3tvkXWZh58N9jp" rel="stylesheet"/>
<link href="resources/css/bootstrap-datetimepicker.css" rel="stylesheet"> <!-- datepiker -->
<link href="resources/css/bootstrap-select.min.css" rel="stylesheet"/> <!-- select -->
<!-- Propio -->
<link href="resources/css/site.css" rel="stylesheet"/>
<link href="resources/css/jquery-ui.css" rel="stylesheet"/>
<!-- Estilos -->
<link href="bower_components/metisMenu/dist/metisMenu.min.css" rel="stylesheet"/>
<link href="bower_components/angular-loading-bar/build/loading-bar.min.css" rel="stylesheet"/>
<link href="resources/css/bootstrap-year-calendar.min.css" rel="stylesheet"/>
<meta content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no" name="viewport"/>
<!-- FAVICON -->
<link href="resources/images/favicon.png" rel="icon" type="image/png">
<!--[if IE]><link rel="shortcut icon" type="image/x-icon" href="resources/images/favicon.png" /><![endif]-->
<link href="./favicon.ico" rel="shortcut icon" type="image/x-icon">
<link href="resources/images/favicon.png" rel="apple-touch-icon">
<!-- PLUGINS GENERALES -->
<script src="resources/js/jquery.js"></script>
<script src="resources/js/jquery-ui.js"></script>
<script src="resources/js/moment.min.js"></script> <!-- datepiker -->
<script src="resources/js/es.js"></script> <!-- datepiker -->
<script src="resources/js/bootstrap.min.js"></script>
<script src="resources/js/bootstrap-hover-dropdown.min.js"></script>
<script src="resources/js/bootstrap-datetimepicker.min.js"></script> <!-- datepiker -->
<script src="resources/js/bootstrap-select.min.js"></script> <!-- select -->
<script src="resources/js/defaults-es_ES.js"></script>
<!-- Javascript -->
<script src="bower_components/angular/angular.js"></script>
<script src="bower_components/angular-ui-router/release/angular-ui-router.min.js"></script>
<script src="bower_components/json3/lib/json3.min.js"></script>
<script src="bower_components/oclazyload/dist/ocLazyLoad.min.js"></script>
<script src="bower_components/angular-loading-bar/build/loading-bar.min.js"></script>
<script src="bower_components/angular-bootstrap/ui-bootstrap-tpls.min.js"></script>
<script src="bower_components/metisMenu/dist/metisMenu.min.js"></script>
<script src="bower_components/moment/min/moment-with-locales.min.js"></script>
<script src="bower_components/angular-sanitize/angular-sanitize.min.js"></script>
<script src="bower_components/angular-translate/angular-translate.min.js"></script>
<script src="bower_components/angular-dynamic-locale/dist/tmhDynamicLocale.js"></script>
<script src="https://code.angularjs.org/1.6.6/i18n/angular-locale_es-es.js"></script>
<script src="resources/js/bootstrap-year-calendar.min.js"></script>
<script src="scripts/i18n/messages_es.js"></script>
<script src="scripts/i18n/messages_en.js"></script>
<!-- Application Modules -->
<script src="scripts/app.js"></script>
<script src="scripts/boletines/controllers/boletinController.js"></script>
<script src="scripts/suplementos/controllers/suplementoController.js"></script>
<script src="scripts/buzonSugerencias/controllers/sugerenciaController.js"></script>
<script src="scripts/homeBorm/controllers/homeController.js"></script>
<script src="scripts/sumario/controllers/sumarioController.js"></script>
<script src="scripts/anunciantes/controllers/anunciantesController.js"></script>
<script src="scripts/anunciante/controllers/anuncianteController.js"></script>
<script src="scripts/anuncio/controllers/anuncioController.js"></script>
<script src="scripts/calendario/controllers/calendarioController.js"></script>
<script src="scripts/sumarioSuplementos/controllers/sumarioSuplementoController.js"></script>
<script src="scripts/ayuntamientos/controllers/ayuntamientosController.js"></script>
<script src="scripts/ayuntamiento/controllers/ayuntamientoController.js"></script>
<script src="scripts/noticias/controllers/noticiasController.js"></script>
<script src="scripts/masVisitado/controllers/masVisitadoController.js"></script>
<script src="scripts/normativaGeneral/controllers/normativaGeneralController.js"></script>
<script src="scripts/plantillasEncuestas/controllers/plantillasEncuestasController.js"></script>
<script src="scripts/preguntasFrecuentes/controllers/preguntasFrecuentesController.js"></script>
<script src="scripts/calendarioSede/controllers/calendarioSedeController.js"></script>
<script src="scripts/directorio/controllers/directorioController.js"></script>
<script src="scripts/datosAbiertos/controllers/datosAbiertosController.js"></script>
<script src="scripts/suscripcion/controllers/suscripcionController.js"></script>
<script src="scripts/normativa/controllers/normativaController.js"></script>
<script src="scripts/enlaces/controllers/enlacesController.js"></script>
<script src="scripts/buscador/controllers/buscadorController.js"></script>
<script src="scripts/rss/controllers/rssController.js"></script>
<script src="scripts/suscripcion/controllers/loginController.js"></script>
<script src="scripts/eli/controllers/jurisdictionController.js"></script>
<script src="scripts/eli/controllers/typeController.js"></script>
<script src="scripts/eli/controllers/yearController.js"></script>
<script src="scripts/eli/controllers/monthController.js"></script>
<script src="scripts/eli/controllers/dayController.js"></script>
<script src="scripts/eli/controllers/numberController.js"></script>
<script src="scripts/eli/controllers/languageController.js"></script>
<script src="scripts/eli/controllers/normativaEliController.js"></script>
<script src="scripts/eli/controllers/corrigendumController.js"></script>
<script src="scripts/eli/controllers/normativaCorrigendumController.js"></script>
<!-- Application Services -->
<script src="scripts/buzonSugerencias/services/sugerenciaService.js"></script>
<script src="scripts/suscripcion/services/suscripcionService.js"></script>
<!-- Application Factories -->
<script src="scripts/suplementos/factories/suplementoFactory.js"></script>
<script src="scripts/boletines/factories/boletinFactory.js"></script>
<script src="scripts/buzonSugerencias/factories/sugerenciaFactory.js"></script>
<script src="scripts/sumario/factories/sumarioFactory.js"></script>
<script src="scripts/anuncio/factories/anuncioFactory.js"></script>
<script src="scripts/calendario/factories/calendarioFactory.js"></script>
<script src="scripts/sumarioSuplementos/factories/sumarioSuplementoFactory.js"></script>
<script src="scripts/ayuntamientos/factories/ayuntamientosFactory.js"></script>
<script src="scripts/noticias/factories/noticiasFactory.js"></script>
<script src="scripts/normativaGeneral/factories/normativaGeneralFactory.js"></script>
<script src="scripts/noticias/factories/noticiasFactory.js"></script>
<script src="scripts/plantillasEncuestas/factories/plantillasEncuestasFactory.js"></script>
<script src="scripts/preguntasFrecuentes/factories/preguntasFrecuentesFactory.js"></script>
<script src="scripts/calendarioSede/factories/calendarioSedeFactory.js"></script>
<script src="scripts/normativa/factories/normativaFactory.js"></script>
<script src="scripts/enlaces/factories/enlacesFactory.js"></script>
<script src="scripts/suscripcion/factories/suscripcionFactory.js"></script>
<script src="scripts/buscador/factories/buscadorFactory.js"></script>
<script src="scripts/rss/factories/rssFactory.js"></script>
<script src="scripts/eli/factories/eliFactory.js"></script>
<!-- Commons Services -->
<script src="scripts/commons/services/alertsService.js"></script>
<script src="scripts/commons/services/deviceService.js"></script>
<script src="scripts/commons/services/breadCrumbService.js"></script>
<!-- Interceptors -->
<script src="scripts/commons/factories/httpResponseErrorHandlerFactory.js"></script>
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async="" src="https://www.googletagmanager.com/gtag/js?id=UA-23666061-1"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-23666061-1');
</script>
<script>
/**
* Función que realiza un seguimiento de un clic en un enlace saliente en Analytics.
* Esta función toma una cadena de URL válida como argumento y la utiliza
* como la etiqueta del evento. Configurar el método de transporte como "beacon" permite que el hit se envíe
* con "navigator.sendBeacon" en el navegador que lo admita.
*/
var trackOutboundLink = function(url) {
gtag('event', 'clic', {
'event_category': 'saliente',
'event_label': url,
'transport_type': 'beacon',
'event_callback': ''
});
}
</script>
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
<script src="https://oss.maxcdn.com/libs/respond.js/1.4.2/respond.min.js"></script>
<![endif]-->
</link></link></link></link></head>
<body>
<h1 class="hidden">UNO</h1>
<a class="hidden" href="https://www.borm.es/#/home/mapaWeb" title="map">Mapa web</a>
<a class="hidden" href="https://www.borm.es/accesibilidad.html" title="Accessibility">Accesibilidad</a>
<div>
<div data-ui-view=""></div>
</div>
</body>
</html>
While searching I've found this: Web-scraping JavaScript page with Python
But I'm too new to this and I just don't understand how to use this for my purposes, but I think it's pointing in the right direction.
So my questions are, what is going on in this webpage, and could you point me on the right direction? Maybe on what's the term that I'm looking for, or how can I use Selenium or BS4 to get what I'm looking for.
Thanks in advance and have a nice weekend!
EDIT: I case somebody need this, this are several pieces of the code I'm running which return empty lists or None when locating elements of the webpage.
soup = BeautifulSoup(requests.get(browser.current_url).content, 'html.parser')
# finds all the disposiciones
lista_disposiciones = soup.find_all("div", {"class": "ng-binding"})
lista_disposiciones = soup.find_all("div", {"class": "ANYTHING THAT GOES HERE RETURNS NOTHING"})
lista_disposiciones = webdriver.find_elements(WHATEVER I USE TO LOCATE ELEMENTS RETURN NOTHING)