«
»
№ 29 (158), 2020 .
. ,
PYTHON
(
)
. . , . ,
. Python.
:
,
,
,
web-
« . . «
»
,
»
),
(DOM-
,
.
,
-
),
URL
-
,
. BeautifulSoup HTML XML.
-
. ,
. ,
web-
web-
.
–
,
-
, python. -
.
,
HTML
Python
, : request; BeautifulSoup. requests
( -
,
, GET
-
,
.
-
, HTML,
-
.
-
,
. BeautifulSoup, . .
,
-
Python, .
<p><p>. -
, ,
-
HTTP-
. .
.
-
:
url = "" headers = {'user_agent': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0; Touch)', 'accept': '*/*'} path = "" content_tags = ["p"] wrap = 80 url - URL – headers – ; path – ;
:
;
content_tag – html wrap – .
,
def get_html(url, params=None): # return html code of url r = requests.get(url, headers=HEADERS, params=params) return r 9
; , :
-