Hello,
I have a problem with my code while runing it inside a Python Source node. Outside the node, in Jupyter Notebook it is ok.
The errors:
init() got an unexpected keyword argument ‘service’
Traceback (most recent call last):
File “”, line 17, in
TypeError: init() got an unexpected keyword argument ‘service’
Timeout value connect was <object object at 0x000001CA21F39B50>, but it must be an int, float or None.
Traceback (most recent call last):
File “”, line 17, in
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\selenium\webdriver\chrome\webdriver.py”, line 76, in init
RemoteWebDriver.init(
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\selenium\webdriver\remote\webdriver.py”, line 157, in init
self.start_session(capabilities, browser_profile)
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\selenium\webdriver\remote\webdriver.py”, line 252, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\selenium\webdriver\remote\webdriver.py”, line 319, in execute
response = self.command_executor.execute(driver_command, params)
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\selenium\webdriver\remote\remote_connection.py”, line 374, in execute
return self._request(command_info[0], url, body=data)
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\selenium\webdriver\remote\remote_connection.py”, line 397, in _request
resp = self._conn.request(method, url, body=body, headers=headers)
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\urllib3_request_methods.py”, line 118, in request
return self.request_encode_body(
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\urllib3_request_methods.py”, line 217, in request_encode_body
return self.urlopen(method, url, **extra_kw)
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\urllib3\poolmanager.py”, line 432, in urlopen
conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\urllib3\poolmanager.py”, line 303, in connection_from_host
return self.connection_from_context(request_context)
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\urllib3\poolmanager.py”, line 328, in connection_from_context
return self.connection_from_pool_key(pool_key, request_context=request_context)
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\urllib3\poolmanager.py”, line 351, in connection_from_pool_key
pool = self._new_pool(scheme, host, port, request_context=request_context)
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\urllib3\poolmanager.py”, line 265, in _new_pool
return pool_cls(host, port, **request_context)
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\urllib3\connectionpool.py”, line 196, in init
timeout = Timeout.from_float(timeout)
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\urllib3\util\timeout.py”, line 190, in from_float
return Timeout(read=timeout, connect=timeout)
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\urllib3\util\timeout.py”, line 119, in init
self._connect = self._validate_timeout(connect, “connect”)
File “C:\Users\gberan\Desktop\Anaconda\envs\py3_knime\lib\site-packages\urllib3\util\timeout.py”, line 156, in _validate_timeout
raise ValueError(
ValueError: Timeout value connect was <object object at 0x000001CA21F39B50>, but it must be an int, float or None.
And my code:
from pandas import DataFrame
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import Select
import pandas as pd
import time
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# Instalar Webdriver
service = Service(ChromeDriverManager().install())
# Abrir aba Web
driver = webdriver.Chrome(executable_path=ChromeDriverManager().install())
# Localizar website e entrar
website = 'https://www.interpol.int/How-we-work/Notices/Red-Notices/View-Red-Notices'
driver.get(website)
# Maximizar aba Web
driver.maximize_window()
# Inplicit wait de 5 segundos para renderizar a página ao abrir
time.sleep(5)
# Localizar botão e fechar aba de Cookies da página
search_button = driver.find_element(by='xpath', value="//i[@class='privacy-cookie-banner__icon-close']")
search_button.click()
# Criar listas vazias para armazenar a informação que queremos
name = []
ages = []
country = []
country_2 = []
testes = ['Brazil']
# Encontrar todos os paises no seletor de nacionalidade da página e anexá los na lista country_2
select_countries = Select(driver.find_element(by='xpath', value='//select[@id="nationality"]'))
for option in select_countries.options:
country_2.append(option.text)
# Remover o primeiro elemento da lista country_2 (é um elemento vazio)
country_2.pop(0)
# Itera pelos países na lista country_2
for pais in testes:
select_countries.select_by_visible_text(pais)
# Loop entre idades de 18 a 100 anos
for age in range(18, 100):
min_age_input = driver.find_element(by='xpath', value='//input[@id="ageMin"]')
min_age_input.clear() # Limpa o campo de idade mínima
min_age_input.send_keys(str(age)) # Insere a idade mínima
max_age_input = driver.find_element(by='xpath', value='//input[@id="ageMax"]')
max_age_input.clear() # Limpa o campo de idade máxima
max_age_input.send_keys(str(age)) # Insere a idade máxima
# Localiza o botão de pesquisa e clika nele
search_button = driver.find_element(by='xpath', value="//button[@id='submit' and @type='submit']")
search_button.click()
time.sleep(5)
# Localiza o elemento da barra de paginação usando XPath
pagination = driver.find_element(by='xpath', value='//div[contains(@id, "paginationPanel")]')
# Localiza todos os elementos <li> dentro do elemento de paginação
pages = pagination.find_elements(by='xpath', value='.//li')
# Obtenha o número da primeira e última página
initial_page = 1
# Verifique se há pelo menos dois elementos em 'pages' antes de acessar 'pages[-2]'
if len(pages) >= 2:
last_page = int(pages[-2].text)
else:
last_page = initial_page
# Loop While para paginar todas as paginas do Website
while initial_page <= last_page:
# Try e Except para pular erros quando a página se encontra vazia (sem a presença de elementos para extrair)
try:
# Explicit Wait para esperar os elementos desejados aparecerem na página
container = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, '//div[contains(@class, "redNoticesList__list")]')))
products = WebDriverWait(container, 5).until(EC.presence_of_all_elements_located((By.XPATH, './/div[contains(@class, "redNoticesList__item notice_red")]')))
except:
pass
# Try e Except para pular erros quando a página se encontra vazia (sem a presença de elementos para extrair)
try:
# Encontrar os elementos que desejamos exrair por xpath e anexar todos em suas devidas listas (criadas acima)
for product in products:
name.append(product.find_element(by='xpath', value=".//div[@class='redNoticeItem__labelText']").text)
ages.append(product.find_element(by='xpath', value='.//span[@class="age"]').text)
country.append(product.find_element(by='xpath', value='.//span[@class="nationalities"]').text)
except:
pass
# Try e Except para evitar erros quando o prgrama chega na ultima aba e não existe mais o botão para uma próxima pagina
try:
# Encontra o botão que vai para a próxima aba (direita) e clika nele
next_button = driver.find_element(by='xpath', value="//a[@class='nextIndex right-arrow']")
next_button.click()
except:
pass
# Parte final da paginação, diz ao loop while para somar +1 na contagem de páginas e ir para a proxima repetição
initial_page = initial_page + 1
# Transforma as listas já preenchidas com informação em um documento CSV, utilizando o Pandas.
max_length = max(len(name), len(ages), len(country))
while len(name) < max_length:
name.append('')
while len(ages) < max_length:
ages.append('')
while len(country) < max_length:
country.append('')
output_table = pd.DataFrame({'Name': name, 'Age': ages, 'Country': country})
May someone help me please?