import requests
import pandas as pd
from bs4 import BeautifulSoup
import urllib3
import urlopen
import time
import pycurl
from io import BytesIO
import parser
# urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
crtfc_key = "8789bbb48450dbd53e3aac6fcec93c34"
corp_code = "00140131" #키다리스튜디오
start_date = "20200420"
# http = urllib3.PoolManager()
def load_data(corp_code):
url = "https://opendart.fss.or.kr/api/list.json?crtfc_key=" + crtfc_key + "&corp_code=" + corp_code + "&bgn_de=" + start_date
# url = "https://opendart.fss.or.kr/api/list.json?crtfc_key="+crtfc_key+"&bgn_de="+start_date+"&page_no=1&page_count=10"
r = requests.get(url)
company_data = r.json() #xml과 비교
return company_data
def print_data(data):
# print(data)
# print(data.keys())
# print(type(data['list']))
for i in data['list']:
# print(i['corp_cls']) # Y, K만 수렴
print(i['corp_name'])
print(i['report_nm'])
# print(i['rcept_no'])
link = "http://dart.fss.or.kr/dsaf001/main.do?rcpNo=" + i['rcept_no']
link = "http://dart.fss.or.kr/dsaf001/main.do?rcpNo=20200421000481&dcmNo=7309797&keyword=%EB%B6%84%EA%B8%B0%EB%B3%B4%EA%B3%A0%EC%84%9C#t2_13"
print(link)
http = urllib3.PoolManager()
res = http.request('GET', link, timeout=3)
soup = BeautifulSoup(res.data, 'lxml')
# print(soup)
body = str(soup.find('head')).split('요약재무정보",')[1].split('cnt++')[0].split('viewDoc(')[1].split(')')[0].split(', ')
body = [body[i][1:-1] for i in range(len(body))]
url_final = 'http://dart.fss.or.kr/report/viewer.do?rcpNo=' + body[0] \
+ '&dcmNo=' + body[1] + '&eleId=' + body[2] + '&offset=' + body[3] \
+ '&length=' + body[4] + '&dtd=dart3.xsd'
#########################################################################################
res = http.request('GET', url_final, timeout=3)
soup = BeautifulSoup(res.data, 'lxml')
body = str(soup).split('연결재무제표')[1]
body = BeautifulSoup(body, 'html.parser')
table = body.find_all('table')
print(table)
p = parser.make2d(table[0])
table = pd.DataFrame(p[1:], columns=p[0])
table['bool'] = table.iloc[:, 0].apply(lambda x: '영업이익' in x or '당기순이익' in x)
table = table[table['bool'] == True]
table.index = table.iloc[:, 0]
del table[table.columns[0]], table['bool']
table = table[table.applymap(lambda x: len(x) > 0) == True]
table = table.dropna(axis=1)
for i in range(len(table.columns)):
table[table.columns[i]] = table[table.columns[i]].apply(
lambda x: ((x.replace(',', '').replace('(', '-')).replace(')', '')))
table = table.astype(dtype=np.int64)
print(table)
# table = bs.find_all('xforms')
# print(table)
# r2 = requests.get(link)
# link_data = r2.json()
# print(link_data)
#원하는 정보 출력
# http = urllib3.PoolManager()
# res = http.request('GET', link, timeout=3)
# bs = BeautifulSoup(res.data, features="lxml")
# list_tag = bs.select("body > div > div > div > div > span")
# print(list_tag)
# for i in list_tag:
# print(i)
# print(bs)
start = time.time()
data = load_data(corp_code)
print("repeat time :", time.time() - start)
print_data(data)
'Computer Science' 카테고리의 다른 글
azure response fast (0) | 2020.08.11 |
---|---|
azure slow response (0) | 2020.08.11 |
파이썬 키움 api 사용 (0) | 2020.03.25 |
8 Problem A : Bit_ImageMap1 (0) | 2020.02.10 |
5 Problem C : 루빅의 사각형 (0) | 2020.02.04 |
댓글