国产成人精品无码青草_亚洲国产美女精品久久久久∴_欧美人与鲁交大毛片免费_国产果冻豆传媒麻婆精东

所在位置：首頁 > 營銷資訊 > 網(wǎng)站運營 > python selenium 爬蟲模擬瀏覽網(wǎng)站內(nèi)容

python selenium 爬蟲模擬瀏覽網(wǎng)站內(nèi)容

時間：2023-04-24 12:51:01 | 來源：網(wǎng)站運營

時間：2023-04-24 12:51:01 來源：網(wǎng)站運營

python selenium 爬蟲模擬瀏覽網(wǎng)站內(nèi)容：使用python selenium編寫的爬蟲代碼，模擬用戶瀏覽某個網(wǎng)站內(nèi)容，廢話少說進入正文。
1、爬蟲界面如下：

界面使用說明：
第一步：填寫要訪問的網(wǎng)站地址
第二步：填寫每天訪問該網(wǎng)址的次數(shù)
第三步：點擊“開始刷量”按鈕開始訪問網(wǎng)站內(nèi)容
2、爬蟲源代碼介紹：
1）點擊“開始刷量”按鈕調(diào)用runjob方法，runjob具體代碼如下：
# 訪問網(wǎng)站操作代碼
def runjob():
# m, s = divmod(second, 60)
# h, m = divmod(m, 60)
# hms = "{:02}:{:02}:{:02}".format(h, m, s)
# http://logger.info(hms)
#a、如果人工停止刷量，則直接結(jié)束當前線程，并刷新界面
if (myframe.stop_refresh_page_thread):
# 增加刷量
addrefreshnum()
# 刷新頁面
myframe.refresh_run_stop_button()
return
# b、正常執(zhí)行任務
refreshnum = myframe.refreshnum
siteurl = myframe.siteurlinput.GetValue().strip()
my_logger_info(logger,"==開始網(wǎng)站%s第%d次刷量<直接訪問>=="%(siteurl,refreshnum+1))
try:
# 創(chuàng)建瀏覽器
driver = createWebDriver()
# 瀏覽器最大化
driver.maximize_window()
# 瀏覽網(wǎng)站
viewSite(driver,siteurl)
except Exception as e:
info = traceback.format_exc()
my_logger_info(logger,info)
driver.save_screenshot(".//refreshpage_directvisit_error.png")
myframe.stop_refresh_page_thread=True
finally:
# 關(guān)閉瀏覽器
driver.quit()
# 增加刷量
addrefreshnum()
# 正常執(zhí)行后刷新界面
myframe.refresh_run_stop_button()
# 打印日志
my_logger_info(logger,"==完成網(wǎng)站%s第%d次刷量<直接訪問>=="%(siteurl,refreshnum+1))

2）runjob=>createWebDriver()代碼如下
#創(chuàng)建瀏覽器驅(qū)動
def createWebDriver():
# 配置參數(shù)
options = webdriver.ChromeOptions()
# 設(shè)置網(wǎng)頁編碼
options.add_argument('lang=zh_CN.UTF-8')
# 禁止加載圖片
options.add_argument('blink-settings=imagesEnabled=false')
# 禁用sandbox
options.add_argument('--no-sandbox')
# 無界面模式
options.add_argument('headless')
driver = webdriver.Chrome(options=options, keep_alive=True)
# 防止selenium訪問被識別出來，不算流量
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
return driver

3）runjob=>viewSite(driver,siteurl)代碼如下
# 模擬瀏覽網(wǎng)站
def viewSite(driver,url):
my_logger_info(logger,"<<開始訪問網(wǎng)站:%s"% url)
driver.get(url)
pagesource = driver.page_source
runIdleSomeTime(random.randint(3, 5))
linklist = []
linklist.append("橋架國標")
linklist.append("橋架價格")
linklist.append("橋架安裝")
prodlist = []
prodlist.append("橋架配件")
prodlist.append("橋架規(guī)格")
for i in range(len(prodlist)):
# 支持人工停止刷量
if (myframe.stop_refresh_page_thread):
break
lanmu = prodlist[i]
viewProductOfLanmu(driver, lanmu)
for i in range(len(linklist)):
# 支持人工停止刷量
if (myframe.stop_refresh_page_thread):
break
lanmu = linklist[i]
viewArticleOfLanmu(driver, lanmu)
# 支持人工停止刷量
if (myframe.stop_refresh_page_thread):
my_logger_info(logger, "已經(jīng)停止刷量")
my_logger_info(logger,">>完成訪問網(wǎng)站:%s" % url)

4）runjob=>viewSite(driver,siteurl)=>viewProductOfLanmu(driver,lanmu)代碼如下
# 查看欄目產(chǎn)品
def viewProductOfLanmu(driver,lanmu):
# 瀏覽相關(guān)欄目
link_d = driver.find_element_by_link_text(lanmu)
# 不直接使用link.click()，避免被其他咨詢窗口遮擋
driver.execute_script("arguments[0].click();", link_d)
# 等待欄目第一頁加載完成
runIdleSomeTime(random.randint(3, 5))
# 支持人工停止刷量
if (myframe.stop_refresh_page_thread):
return
pagesource = driver.page_source
soup = BeautifulSoup(pagesource, "html.parser")
# logger.debug(soup.prettify())
while True:
# 查看當前頁所有文章
newsdiv_s = soup.find("div", class_="list").find_all("div", class_="mask")
for i in range(len(newsdiv_s)):
link=newsdiv_s[i].find("a")
my_logger_info(logger,"訪問頁面：%s" % link['href'])
# 在新的窗口打開文章
js = "window.open('" + link['href'] + "','_blank');"
try:
driver.execute_script(js)
except Exception as e:
info = traceback.format_exc()
my_logger_info(logger, info)
continue
# driver.implicitly_wait(3)
# 查看打開的文章內(nèi)容
runIdleSomeTime(random.randint(5, 7))
# 支持人工停止刷量
if (myframe.stop_refresh_page_thread):
break
# driver.close()
# 獲取當前打開的所有窗口
windows = driver.window_handles
# 轉(zhuǎn)換到最新打開的窗口
driver.switch_to.window(windows[-1])
driver.close()
# 轉(zhuǎn)換到父窗口
driver.switch_to.window(windows[0])
# 在當前頁瀏覽文章
runIdleSomeTime(random.randint(1, 3))
# 支持人工停止刷量
if (myframe.stop_refresh_page_thread):
break
# 支持人工停止刷量
if (myframe.stop_refresh_page_thread):
break
# 判斷是否有下一頁
pagediv_s = soup.find("div", class_="pageBox")
nextpagelink_s=pagediv_s.find("a",text="下一頁")
if (not nextpagelink_s):
break
# 嘗試翻轉(zhuǎn)到下一頁,翻頁失敗則不再進行
nextpagelink_d = driver.find_element_by_link_text("下一頁")
driver.execute_script("arguments[0].click();", nextpagelink_d)
# 等待欄目當前頁加載完成
runIdleSomeTime(random.randint(3, 5))
pagesource = driver.page_source
soup = BeautifulSoup(pagesource, "html.parser")

5）runjob=>viewSite(driver,siteurl)=>viewArticleOfLanmu(driver,lanmu)代碼如下
# 查看欄目文章
def viewArticleOfLanmu(driver,lanmu):
# 瀏覽相關(guān)欄目
link_d = driver.find_element_by_link_text(lanmu)
# 不直接使用link.click()，避免被其他咨詢窗口遮擋
driver.execute_script("arguments[0].click();", link_d)
# 等待欄目第一頁加載完成
runIdleSomeTime(random.randint(3, 5))
# 支持人工停止刷量
if (myframe.stop_refresh_page_thread):
return
pagesource = driver.page_source
soup = BeautifulSoup(pagesource, "html.parser")
# logger.debug(soup.prettify())
while True:
# 查看當前頁所有文章
newsdiv_s = soup.find("div", class_="newsList")
for link in newsdiv_s.find_all("a", class_="look"):
my_logger_info(logger,"訪問頁面：%s" % link['href'])
# 在新的窗口打開文章
js = "window.open('" + link['href'] + "','_blank');"
driver.execute_script(js)
# driver.implicitly_wait(3)
# 查看打開的文章內(nèi)容
runIdleSomeTime(random.randint(5, 7))
# 支持人工停止刷量
if (myframe.stop_refresh_page_thread):
break
# driver.close()
# 獲取當前打開的所有窗口
windows = driver.window_handles
# 轉(zhuǎn)換到最新打開的窗口
driver.switch_to.window(windows[-1])
driver.close()
# 轉(zhuǎn)換到父窗口
driver.switch_to.window(windows[0])
# 在當前頁瀏覽文章
runIdleSomeTime(random.randint(5, 7))
# 支持人工停止刷量
if (myframe.stop_refresh_page_thread):
break
# 支持人工停止刷量
if (myframe.stop_refresh_page_thread):
break
# 判斷是否有下一頁
pagediv_s = soup.find("div", class_="pageBox")
nextpagelink_s=pagediv_s.find("a",text="下一頁")
if (not nextpagelink_s):
break
# 嘗試翻轉(zhuǎn)到下一頁,翻頁失敗則不再進行
nextpagelink_d = driver.find_element_by_link_text("下一頁")
driver.execute_script("arguments[0].click();", nextpagelink_d)
# 等待欄目當前頁加載完成
runIdleSomeTime(random.randint(3, 5))
pagesource = driver.page_source
soup = BeautifulSoup(pagesource, "html.parser")

3、爬蟲訪問示例網(wǎng)站：

首頁鏈接：

http://www.jywy.bj.cn/

欄目鏈接：

http://www.jywy.bj.cn/index.php?s=/List/index/cid/23.html

文章鏈接：

http://www.jywy.bj.cn/index.php?s=/Show/index/cid/23/id/167.html

http://www.jywy.bj.cn/index.php?s=/Show/index/cid/23/id/168.html

歡迎有興趣的同學留言交流。

關(guān)鍵詞：內(nèi)容,瀏覽,模擬,爬蟲

解決方案&服務

客戶&案例

營銷資訊

關(guān)于我們

微信公眾號

版權(quán)所有? 億企邦 1997-2025 保留一切法律許可權(quán)利。

為了最佳展示效果，本站不支持IE9及以下版本的瀏覽器，建議您使用谷歌Chrome瀏覽器。點擊下載Chrome瀏覽器

關(guān)閉

国产成人精品无码青草_亚洲国产美女精品久久久久∴_欧美人与鲁交大毛片免费_国产果冻豆传媒麻婆精东

快捷入口

python selenium 爬蟲模擬瀏覽網(wǎng)站內(nèi)容

什么樣的域名利于網(wǎng)站SEO優(yōu)化？

怎么創(chuàng)建一個表白網(wǎng)站！網(wǎng)站！網(wǎng)站？

最好的 Python 網(wǎng)站開發(fā)方面的學習教程有哪些？

廣告公司一般在哪些網(wǎng)站找設(shè)計素材？

中國著名的高端私人旅游定制機構(gòu)有哪些呢？

完美的網(wǎng)站SEO優(yōu)化計劃方案【簡單seo】

有什么運營網(wǎng)站推薦？

網(wǎng)站建設(shè)制作-建企業(yè)網(wǎng)站公司

杭州APP開發(fā)公司外包價格

為什么要搭建電商網(wǎng)站？有哪些好處

国产成人精品无码青草_亚洲国产美女精品久久久久∴_欧美人与鲁交大毛片免费_国产果冻豆传媒麻婆精东

快捷入口

python selenium 爬蟲 模擬瀏覽網(wǎng)站內(nèi)容

推薦文章