Píldora SEOplus 2022

juande2marin
julio 23, 2022

Os dejo el script hecho en Python para identificar los niveles de profundidad.

Estaré mejorando este código y más script en Pyton para SEO

import time
import requests
import pandas as pd
from bs4 import BeautifulSoup
rana="https://nred.es"
dom=rana+"/"
img=rana+"/wp-content"
nivelCero=[dom]
listaEnlace=[]
listaRes=[]
listaResultado=[]
listaNivel=[]

def Sumando(lista):
    for uno in lista:
        r=requests.get(uno, headers={"User-Agent":"Chtome/50.0.2661.94"})
        soup=BeautifulSoup(r.content)
        todas=soup.findAll('a')
        for ur in todas:
            href=ur.get('href')
            if href != None:
                if href.startswith(rana+"/wp-content"):
                    listaImagenes.append(href)
                if href.startswith(rana) or href.startswith("/") and not href.startswith("/cdn-cgi/") and not href.startswith("//www"):
                    if href.startswith("/"):
                        enlace=rana+href
                        listaEnlace.append(enlace)
                    else:
                        if not href.startswith(img):
                            listaEnlace.append(href)
        listaSum=list(set(listaEnlace))
    return listaSum

listaRes=nivelCero
listaSuma=[]
############################################## NIVELES profundidad ##########################
for n in range(9):
    listaUnica=Sumando(listaRes)
    listaSuma=list(set(listaSuma+listaRes))
    listaRes=[i for i in listaUnica if i not in listaSuma] # Filtro para dejar solo nuevas URL

    if listaRes != []:
        for r in listaRes:
            listaResultado.append(r)
            listaNivel.append(n+1)
        dicClic={'URL':listaRes,'Nivel':(n+1)} # Primera pasada dominio+home
        frames = pd.DataFrame(dicClic)
        print(frames)

    else:
        print("Fin del rastreo Nivel {}".format(n+1))
        break

miDicc={'URL':listaResultado, 'Nivel':listaNivel}        
        
frames = pd.DataFrame(miDicc)
frames.to_csv('crawler_seo.csv', encoding = 'utf-8-sig')
frames

Ver este vídeo en YouTube

Juande Marín

Profesor de Marketing digital, divulgador de inteligencia artificial y neuroeducación. Especializado en posicionamiento en buscadores y diseño web. Autor de varios libros relacionados con el comercio electrónico y el marketing digital (McGraw Hill, Paraninfo,…) Juande2marin