In [2]:
from time import sleep
from selenium.webdriver.chrome.service import Service
from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.common.by import By
import warnings
def main():
# 忽略警告
warnings.filterwarnings("ignore")
# 创建一个驱动
service = Service('chromedriver.exe')
options = ChromeOptions()
# 伪造浏览器
options.add_experimental_option(
'excludeSwitches', ['enable-automation', 'enable-logging'])
options.add_experimental_option('useAutomationExtension', False)
# 创建一个浏览器
driver = Chrome(service=service, options=options)
# 绕过检测
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => false
})
"""
})
# 打开知乎登录页面
driver.get('https://www.zhihu.com/')
sleep(30)
# 点击搜索框
driver.find_element(By.ID, 'Popover1-toggle').click()
# 输入内容
driver.find_element(By.ID, 'Popover1-toggle').send_keys('汉江大学')
sleep(2)
# 点击搜索图标
driver.find_element(
By.XPATH, '//*[@id="root"]/div/div[2]/header/div[2]/div[1]/div/form/div/div/label/button').click()
# 等待页面加载完
driver.implicitly_wait(20)
# 获取标题
title = driver.find_element(
By.XPATH, '//*[@id="SearchMain"]/div/div/div/div/div[2]/div/div/div/h2/div/a/span').text
# 点击阅读全文
driver.find_element(
By.XPATH, '//*[@id="SearchMain"]/div/div/div/div/div[2]/div/div/div/div/span/div/button').click()
sleep(2)
# 获取帖子内容
content = driver.find_element(
By.XPATH, '//*[@id="SearchMain"]/div/div/div/div/div[2]/div/div/div/div/span[1]/div/span/p').text
# 点击评论
driver.find_element(
By.XPATH, '//*[@id="SearchMain"]/div/div/div/div/div[2]/div/div/div/div/div[3]/div/div/button[1]').click()
sleep(2)
# 点击获取更多评论
driver.find_element(
By.XPATH, '//*[@id="SearchMain"]/div/div/div/div/div[2]/div/div/div/div[2]/div/div/div[2]/div[2]/div/div[3]/button').click()
sleep(2)
# 获取评论数据的节点
divs = driver.find_elements(
By.XPATH, '/html/body/div[6]/div/div/div[2]/div/div/div/div[2]/div[3]/div')
try:
for div in divs:
# 评论内容
comment = div.find_element(By.XPATH, './div/div/div[2]').text
f.write(comment) # 写入文件
f.write('\n')
print(comment)
except:
driver.close()
if __name__ == '__main__':
# 创建文件存储数据
with open('05.txt', 'a', encoding='utf-8')as f:
main()
Unable to obtain driver using Selenium Manager: Selenium Manager failed for: D:\Python310\lib\site-packages\selenium\webdriver\common\windows\selenium-manager.exe --browser chrome --output json. The chromedriver version cannot be discovered
--------------------------------------------------------------------------- SeleniumManagerException Traceback (most recent call last) Cell In[2], line 64 61 if __name__ == '__main__': 62 # 创建文件存储数据 63 with open('05.txt','a',encoding='utf-8')as f: ---> 64 main() Cell In[2], line 16, in main() 14 options.add_experimental_option('useAutomationExtension', False) 15 # 创建一个浏览器 ---> 16 driver = Chrome(service=service,options=options) 17 # 绕过检测 18 driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { 19 "source": """ 20 Object.defineProperty(navigator, 'webdriver', { (...) 23 """ 24 }) File D:\Python310\lib\site-packages\selenium\webdriver\chrome\webdriver.py:82, in WebDriver.__init__(self, executable_path, port, options, service_args, desired_capabilities, service_log_path, chrome_options, service, keep_alive) 80 if not service: 81 service = Service(executable_path, port, service_args, service_log_path) ---> 82 service.path = DriverFinder.get_path(service, options) 84 super().__init__( 85 DesiredCapabilities.CHROME["browserName"], 86 "goog", (...) 93 keep_alive, 94 ) File D:\Python310\lib\site-packages\selenium\webdriver\common\driver_finder.py:43, in DriverFinder.get_path(service, options) 41 except WebDriverException as err: 42 logger.warning("Unable to obtain driver using Selenium Manager: " + err.msg) ---> 43 raise err 45 return path File D:\Python310\lib\site-packages\selenium\webdriver\common\driver_finder.py:40, in DriverFinder.get_path(service, options) 37 @staticmethod 38 def get_path(service: Service, options: BaseOptions) -> str: 39 try: ---> 40 path = shutil.which(service.path) or SeleniumManager().driver_location(options) 41 except WebDriverException as err: 42 logger.warning("Unable to obtain driver using Selenium Manager: " + err.msg) File D:\Python310\lib\site-packages\selenium\webdriver\common\selenium_manager.py:101, in SeleniumManager.driver_location(self, options) 98 args.append("--browser-path") 99 args.append(str(binary_location)) --> 101 result = self.run(args) 102 executable = result.split("\t")[-1].strip() 103 logger.debug(f"Using driver at: {executable}") File D:\Python310\lib\site-packages\selenium\webdriver\common\selenium_manager.py:122, in SeleniumManager.run(args) 120 result = output["result"]["message"] 121 if completed_proc.returncode: --> 122 raise SeleniumManagerException(f"Selenium Manager failed for: {command}.\n{result}{stderr}") 123 else: 124 # Selenium Manager exited successfully, return executable path and print warnings 125 for item in output["logs"]: SeleniumManagerException: Message: Selenium Manager failed for: D:\Python310\lib\site-packages\selenium\webdriver\common\windows\selenium-manager.exe --browser chrome --output json. The chromedriver version cannot be discovered
In [ ]: