데이터 가공
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
| from selenium import webdriver import os
browser = webdriver.Chrome('C:/Users/OHG/Downloads/chromedriver_win32/chromedriver') browser.implicitly_wait(5) url = "https://www.youtube.com/watch?v=94YwFIJ-yR0&list=PL3Eb1N33oAXijqFKrO83hDEN0HPwaecV3&index=1" browser.get(url)
for a in range(1, 2): b = str(a) os.mkdir(b + "번째 기사") os.chdir(b + "번째 기사") print(a, "번째 url open") products = browser.find_elements_by_css_selector('#description > yt-formatted-string') f = open("기사 대본.txt", 'w') for product in products: Z = product.text f.write(Z[:-117]) browser.save_screenshot("Website.png") os.chdir("..") browser.quit()
|
Directory
만들고 거기에 txt
파일로 내용 저장하기 성공
하지만 다음 영상을 가지고 오는 방법을 만들어야한다
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
| from selenium import webdriver import os import time
browser = webdriver.Chrome('C:/Users/OHG/Downloads/chromedriver_win32/chromedriver') browser.implicitly_wait(5) url = "https://www.youtube.com/watch?v=94YwFIJ-yR0&list=PL3Eb1N33oAXijqFKrO83hDEN0HPwaecV3&index=1" browser.get(url)
for a in range(1, 3): b = str(a) os.mkdir(b + "번째 기사") os.chdir(b + "번째 기사") print(a, "번째 url open") products = browser.find_elements_by_css_selector('#description > yt-formatted-string') f = open("기사 대본.txt", 'w') for product in products: Z = product.text f.write(Z[:-117]) browser.save_screenshot("Website.png") browser.find_element_by_css_selector('#movie_player > div.ytp-chrome-bottom > div.ytp-chrome-controls > div.ytp-left-controls > a.ytp-next-button.ytp-button').click() os.chdir("..") time.sleep(3) browser.quit()
|
CSS 선택자
를 통해 Click()
함수로 해결
음성 추출
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
| from selenium import webdriver import os import time import pytube
browser = webdriver.Chrome('C:/Users/OHG/Downloads/chromedriver_win32/chromedriver') browser.implicitly_wait(5) url = "https://www.youtube.com/watch?v=94YwFIJ-yR0&list=PL3Eb1N33oAXijqFKrO83hDEN0HPwaecV3&index=1" browser.get(url)
for a in range(1, 3): b = str(a) os.mkdir(b + "번째 기사") os.chdir(b + "번째 기사") print(a, "번째 url open") products = browser.find_elements_by_css_selector('#description > yt-formatted-string') f = open("기사 대본.txt", 'w') for product in products: Z = product.text f.write(Z[:-117]) yt = browser.current_url yt = pytube.YouTube(yt) stream = yt.streams.first() stream.download() browser.save_screenshot("Website.png") browser.find_element_by_css_selector('#movie_player > div.ytp-chrome-bottom > div.ytp-chrome-controls > div.ytp-left-controls > a.ytp-next-button.ytp-button').click() os.chdir("..") time.sleep(3) browser.quit()
|
동영상 추출 성공!