文獻(xiàn)批量下載器PyCNKi使用教程
PyCNKi下載器使用教程
PyCNKi下載器源碼
(百度鏈接里有.ipynb格式源碼)
一、導(dǎo)入庫(kù)
from selenium import webdriverfrom selenium.webdriver import ChromeOptionsfrom selenium.webdriver.chrome.options import Optionsimport openpyxlimport reimport timefrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support.select import Selectimport urllib.error
二、打開(kāi)知網(wǎng)并進(jìn)行初始設(shè)置
#無(wú)可視化界面操作def wu_visual(): chrome_options = Options() chrome_options.a(chǎn)dd_argument('--h(huán)eadless') chrome_options.a(chǎn)dd_argument('--disable-gpu') return chrome_options
def fan_jiance(): option = ChromeOptions() option.a(chǎn)dd_experimental_option('excludeSwitches', ['enable-automation']) #option.a(chǎn)dd_argument('-kiosk') return optiondef url_error_test(url,bro): try: bro.get(url) print("OK") except urllib.error.HTTPError as e: print(e.code) print(e.reason) except urllib.error.URLError as e: print(e.reason) return e.reason
chrome_options=wu_visual()option=fan_jiance()chrome_path =r'./chromedriver.exe'bro = webdriver.Chrome(executable_path=chrome_path,chrome_options=chrome_options,options=option)
#用火狐的朋友可以把下一行代碼的“!比サ艏纯桑ro = webdriver.Firefox()
bro.maximize_window() #最大化url = r'http://kns.cnki.net' #知網(wǎng)網(wǎng)址bro.get(url)
三、關(guān)鍵詞搜索
#模擬輸入關(guān)鍵字查詢(xún)#請(qǐng)選擇您需要使用的查詢(xún)方式,本代碼只提供標(biāo)題查詢(xún)input_title = bro.find_element_by_id("txt_SearchText")input_title.click()time.sleep(2)key_value = input("請(qǐng)輸入你要下載的論文標(biāo)題:")
input_title.send_keys(key_value)#點(diǎn)擊搜索div_search = bro.find_element_by_xpath('/html/body/div[1]/div[2]/div/div[1]/input[2]')div_search.click()time.sleep(1)#點(diǎn)擊期刊論文default_1=20bro.find_element_by_xpath("/html/body/div[5]/div[1]/div/ul[1]/li[1]/a/span").click()time.sleep(10)total_num = bro.find_element_by_xpath("/html/body/div[5]/div[1]/div/ul[1]/li[1]/a/em")if int(total_num.text)<=default_1: print("一共搜索到"+total_num.text+"條結(jié)果") print("共一頁(yè)")else: print("一共搜索到" + total_num.text + "條結(jié)果") total_page =bro.find_element_by_xpath('/[@id="gridTable"]/div[2]/span[1]') print(total_page.text) num =int(total_page.text[1:-1])
四、選擇下載格式及批量下載到幾頁(yè)
print("1:PDF格式2:CAJ格式請(qǐng)輸入下載文件的格式對(duì)應(yīng)數(shù)字:")load_num = int(input("請(qǐng)輸入1 or 2:"))
print("請(qǐng)輸入您要下載到第幾頁(yè)碼:")
五、開(kāi)始批量下載
load_page = int(input())while load_page>num or load_page<=0: print("輸入頁(yè)碼錯(cuò)誤,請(qǐng)重新輸入:") load_page = int(input("請(qǐng)輸入1 or 2:"))bro_new = webdriver.Chrome(executable_path=chrome_path, chrome_options=chrome_options,options=option)if int(total_num.text)<=default_1: url_link = bro.find_elements_by_xpath('/[@id="gridTable"]/table/tbody/tr/td[2]/a') for link_1 in url_link: count=1 link = url + r'/kcms/detail/detail.a(chǎn)spx?' + link_1.get_attribute("href")[20:] bro_new = webdriver.Chrome(executable_path=chrome_path,chrome_options=chrome_options,options=option) bro_new.get(link) bro_new.maximize_window() # print("編號(hào)為"+str(count)+"的論文:"+bro_new.find_element_by_xpath("/html/body/div[2]/div[1]/div[3]/div/div[1]/div[3]/div[1]/h1").text+"————正在下載") time.sleep(10) if bro_new.find_element_by_xpath('/html/body/div[2]/div').text == "URL參數(shù)錯(cuò)誤": print("編號(hào)為"+str(count)+"的論文:"+bro_new.find_element_by_xpath("/html/body/div[2]/div[1]/div[3]/div/div[1]/div[3]/div[1]/h1").text+"————論文下載失敗") bro_new.quit() count += 1 continue if load_num == 1: bro_new.find_element_by_id('pdfDown').click() time.sleep(10) print("編號(hào)為" + str(count) + "的論文:" + bro_new.find_element_by_xpath("/html/body/div[2]/div[1]/div[3]/div/div[1]/div[3]/div[1]/h1").text + "————下載成功") count += 1 bro_new.quit() if load_num == 2: bro_new.find_element_by_id('cajDown').click() time.sleep(10) print("編號(hào)為" + str(count) + "的論文:" + bro_new.find_element_by_xpath("/html/body/div[2]/div[1]/div[3]/div/div[1]/div[3]/div[1]/h1").text + "————下載成功") count += 1 bro_new.quit()else: for ii in range(0,load_page): count=1 url_link = bro.find_elements_by_xpath('/[@id="gridTable"]/table/tbody/tr/td[2]/a') for link_1 in url_link: link = url + r'/kcms/detail/detail.a(chǎn)spx?' + link_1.get_attribute("href")[20:] bro_new = webdriver.Chrome(executable_path=chrome_path,chrome_options=chrome_options,options=option) bro_new.get(link) bro_new.maximize_window() time.sleep(10) if bro_new.find_element_by_xpath('/html/body/div[2]/div').text == "URL參數(shù)錯(cuò)誤": bro_new.quit() print("編號(hào)為" + str(count) + "的論文:" + bro_new.find_element_by_xpath("/html/body/div[2]/div[1]/div[3]/div/div[1]/div[3]/div[1]/h1").text + "————論文下載失敗") bro_new.quit() count += 1 continue if load_num == 1: bro_new.find_element_by_name('pdfDown').click() time.sleep(10) print("編號(hào)為" + str(count) + "的論文:" + bro_new.find_element_by_xpath("/html/body/div[2]/div[1]/div[3]/div/div[1]/div[3]/div[1]/h1").text + "————下載成功") count += 1 bro_new.quit() if load_num == 2: bro_new.find_element_by_name('cajDown').click() time.sleep(5) print("編號(hào)為" + str(count) + "的論文:" + bro_new.find_element_by_xpath("/html/body/div[2]/div[1]/div[3]/div/div[1]/div[3]/div[1]/h1").text + "————下載成功") count += 1 bro_new.quit() bro.find_element_by_xpath('/[@id="PageNext"]').click() time.sleep(10)

發(fā)表評(píng)論
請(qǐng)輸入評(píng)論內(nèi)容...
請(qǐng)輸入評(píng)論/評(píng)論長(zhǎng)度6~500個(gè)字
圖片新聞
-
機(jī)器人奧運(yùn)會(huì)戰(zhàn)報(bào):宇樹(shù)機(jī)器人摘下首金,天工Ultra搶走首位“百米飛人”
-
存儲(chǔ)圈掐架!江波龍起訴佰維,索賠121萬(wàn)
-
長(zhǎng)安汽車(chē)母公司突然更名:從“中國(guó)長(zhǎng)安”到“辰致科技”
-
豆包前負(fù)責(zé)人喬木出軌BP后續(xù):均被辭退
-
字節(jié)AI Lab負(fù)責(zé)人李航卸任后返聘,Seed進(jìn)入調(diào)整期
-
員工持股爆雷?廣汽埃安緊急回應(yīng)
-
中國(guó)“智造”背后的「關(guān)鍵力量」
-
小米汽車(chē)研發(fā)中心重磅落地,寶馬家門(mén)口“搶人”
最新活動(dòng)更多
-
10月23日火熱報(bào)名中>> 2025是德科技創(chuàng)新技術(shù)峰會(huì)
-
10月23日立即報(bào)名>> Works With 開(kāi)發(fā)者大會(huì)深圳站
-
10月24日立即參評(píng)>> 【評(píng)選】維科杯·OFweek 2025(第十屆)物聯(lián)網(wǎng)行業(yè)年度評(píng)選
-
11月27日立即報(bào)名>> 【工程師系列】汽車(chē)電子技術(shù)在線(xiàn)大會(huì)
-
12月18日立即報(bào)名>> 【線(xiàn)下會(huì)議】OFweek 2025(第十屆)物聯(lián)網(wǎng)產(chǎn)業(yè)大會(huì)
-
精彩回顧立即查看>> 【限時(shí)福利】TE 2025國(guó)際物聯(lián)網(wǎng)展·深圳站
推薦專(zhuān)題
- 1 人形機(jī)器人,正狂奔在批量交付的曠野
- 2 宇樹(shù)機(jī)器人撞人事件的深度剖析:六維力傳感器如何成為人機(jī)安全的關(guān)鍵屏障
- 3 解碼特斯拉新AI芯片戰(zhàn)略 :從Dojo到AI5和AI6推理引擎
- 4 AI版“四萬(wàn)億刺激”計(jì)劃來(lái)了
- 5 2025年8月人工智能投融資觀察
- 6 7 a16z最新AI百?gòu)?qiáng)榜:硅谷頂級(jí)VC帶你讀懂全球生成式AI賽道最新趨勢(shì)
- 8 Manus跑路,大廠掉線(xiàn),只能靠DeepSeek了
- 9 一家被嚴(yán)重低估的國(guó)產(chǎn)AI巨頭
- 10 地平線(xiàn)的野心:1000萬(wàn)套HSD上車(chē)