๊ด€๋ฆฌ ๋ฉ”๋‰ด

๐Ÿฆ• ๊ณต๋ฃก์ด ๋˜์ž!

2BPerfect...12 ๋ณธ๋ฌธ

Development/Python

2BPerfect...12

Kirok Kim 2022. 1. 27. 11:39
์›น ์Šคํฌ๋ž˜ํ•‘
#pandas ๋‚ด๋ถ€์ ์œผ๋กœ matplotlib์„ ์‚ฌ์šฉํ•˜๊ธฐ ๋•Œ๋ฌธ์—
# ๊ฐ„๋‹จํ•œ ๊ทธ๋ž˜ํ”„๋ผ๋ฉด pandas๋งŒ import ์‹œ์ผœ๋„ ๋จ!
# ์›น์‚ฌ์ดํŠธ
## ํ•˜๋‚˜
import webbrowser
url=naver~
webbrowser.open(url)
naver_search_url='~/search.naver?query='
search_word='ํŒŒ์ด์ฌ'
url=nsu+sw

webbrowser.open_new(url)

##์—ฌ๋Ÿฌ๊ฐœ
urls=['๋„ค์ด๋ฒ„','๊ตฌ๊ธ€'~]
for url in urls:
	webbrowser.open_new(url)

search_words=['ํŒŒ์ด์ฌ','','']

for search_word in sws:
	webbrowser.open_new(url+search_word)

# ์›นํŽ˜์ด์ง€ ์†Œ์Šค ๊ฐ€์ ธ์˜ค๊ธฐ
import requests

r=requests.get('')
r
r.text[0:100] ์ด๊ฒƒ์œผ๋กœ ๊ฐ€์ ธ์˜จhtml ์†Œ์Šค๋ฅผ ํ™•์ธ

html=r=requests.get('').text
## ์†Œ์Šค๋ฅผ ์ข€ ๋” ๋ณด๊ธฐ ์‰ฝ๊ฒŒ ํ•˜๋Š” ๊ฒƒ Parser Beautifulsoup
# html ๋ถ„์„ ํŒŒ์‹ฑ(parsing) ์›ํ•˜๋Š” ํƒœ๊ทธ๋ฅผ ๊ฒ€์ƒ‰ ์ถ”์ถœ
print(soup.prettify() ) # ์ธํ„ฐ๋„ท์—์„œ ๋ณด๋˜ ํŠธ๋ฆฌํ˜•์‹์œผ๋กœ ์ถœ๋ ฅ

from bs4 import BeautifulSoup

bs=BeautifulSoup(req.text,'html.parser')
bs.find('').get_text() #''ํƒœ๊ทธ text ์ถ”์ถœ
#bs.find('ํƒœ๊ทธ','์†์„ฑ')=๋‹จ์ˆœ ํƒœ๊ทธ๋งŒ ๊ฒ€์ƒ‰, ๋ณต์žกํ•œ ๊ตฌ์กฐ ๊ฒ€์ƒ‰ ๋ถˆ๊ฐ€๋Šฅ

#bs.find_all('ํƒœ๊ทธ','์†์„ฑ')= ๋‹จ์ˆœ ํƒœ๊ทธ๋งŒ ๊ฒ€์ƒ‰ ๋ณต์žกํ•œ ๊ตฌ์กฐ ๊ฒ€์ƒ‰ ๋ถˆ๊ฐ€๋Šฅ
sn=bs.find_all('')
for s in sn:
	print(s.get_text())
bs.title
bs.body
bs.body.h1

bts=bs.find_all('p',{'id':'book_title'})
aus=bs.find_all('p',{'id':'author'})

for bt,au in zip(bts,aus):
	print(bt.get_text()+'/'+au.get_text())

'''
์ฃผ์š” css5๊ฐ€์ง€
#id id='id๊ฐ’'
.class class='class๊ฐ’' .cls1.cls2 ํด๋ž˜์Šค๊ฐ’์—ฌ๋Ÿฌ๊ฐœ ๊ฐ€์ง€๊ณ  ์žˆ๋Š” ์„ ํƒ์ž class='cls1 cls2'
tag <tag>
์„ ํƒ์ž1> ์„ ํƒ์ž2 ์ž์‹ ์„ ํƒ์ž
์„ ํƒ์ž1 ์„ ํƒ์ž2 ์ž์† ์„ ํƒ์ž
'''
bs.select('css์„ ํƒ์ž') select๋Š” ๋‹ค์ค‘ ์„ ํƒ
bs.select_one('category') select_one ํ•˜๋‚˜๋งŒ ์„ ํƒ

bs.select('body p')
bs.select('p#book_title')

๋ฐ˜์‘ํ˜•

'Development > Python' ์นดํ…Œ๊ณ ๋ฆฌ์˜ ๋‹ค๋ฅธ ๊ธ€

2BPerfect...14  (0) 2022.01.27
2BPerfect...13  (0) 2022.01.27
2BPerfect...11  (0) 2022.01.26
2BPerfect...10  (0) 2022.01.26
2BPerfect...9  (0) 2022.01.25
Comments