본문 바로가기
Computer Science

dart

by OKOK 2020. 5. 3.

import requests
import pandas as pd
from bs4 import BeautifulSoup
import urllib3
import urlopen
import time
import pycurl
from io import BytesIO
import parser


# urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

crtfc_key = "8789bbb48450dbd53e3aac6fcec93c34"
corp_code = "00140131" #키다리스튜디오
start_date = "20200420"
# http = urllib3.PoolManager()

def load_data(corp_code):
url = "https://opendart.fss.or.kr/api/list.json?crtfc_key=" + crtfc_key + "&corp_code=" + corp_code + "&bgn_de=" + start_date
# url = "https://opendart.fss.or.kr/api/list.json?crtfc_key="+crtfc_key+"&bgn_de="+start_date+"&page_no=1&page_count=10"
r = requests.get(url)
company_data = r.json() #xml과 비교
return company_data


def print_data(data):
# print(data)
# print(data.keys())
# print(type(data['list']))


for i in data['list']:
# print(i['corp_cls']) # Y, K만 수렴
print(i['corp_name'])
print(i['report_nm'])
# print(i['rcept_no'])
link = "http://dart.fss.or.kr/dsaf001/main.do?rcpNo=" + i['rcept_no']

link = "http://dart.fss.or.kr/dsaf001/main.do?rcpNo=20200421000481&dcmNo=7309797&keyword=%EB%B6%84%EA%B8%B0%EB%B3%B4%EA%B3%A0%EC%84%9C#t2_13"
print(link)

http = urllib3.PoolManager()
res = http.request('GET', link, timeout=3)
soup = BeautifulSoup(res.data, 'lxml')
# print(soup)

body = str(soup.find('head')).split('요약재무정보",')[1].split('cnt++')[0].split('viewDoc(')[1].split(')')[0].split(', ')
body = [body[i][1:-1] for i in range(len(body))]
url_final = 'http://dart.fss.or.kr/report/viewer.do?rcpNo=' + body[0] \
+ '&dcmNo=' + body[1] + '&eleId=' + body[2] + '&offset=' + body[3] \
+ '&length=' + body[4] + '&dtd=dart3.xsd'
#########################################################################################

res = http.request('GET', url_final, timeout=3)
soup = BeautifulSoup(res.data, 'lxml')

body = str(soup).split('연결재무제표')[1]
body = BeautifulSoup(body, 'html.parser')
table = body.find_all('table')

print(table)


p = parser.make2d(table[0])
table = pd.DataFrame(p[1:], columns=p[0])

table['bool'] = table.iloc[:, 0].apply(lambda x: '영업이익' in x or '당기순이익' in x)
table = table[table['bool'] == True]

table.index = table.iloc[:, 0]
del table[table.columns[0]], table['bool']
table = table[table.applymap(lambda x: len(x) > 0) == True]
table = table.dropna(axis=1)
for i in range(len(table.columns)):
table[table.columns[i]] = table[table.columns[i]].apply(
lambda x: ((x.replace(',', '').replace('(', '-')).replace(')', '')))
table = table.astype(dtype=np.int64)
print(table)



# table = bs.find_all('xforms')
# print(table)



# r2 = requests.get(link)
# link_data = r2.json()
# print(link_data)

#원하는 정보 출력
# http = urllib3.PoolManager()
# res = http.request('GET', link, timeout=3)
# bs = BeautifulSoup(res.data, features="lxml")
# list_tag = bs.select("body > div > div > div > div > span")
# print(list_tag)
# for i in list_tag:
# print(i)


# print(bs)


start = time.time()
data = load_data(corp_code)
print("repeat time :", time.time() - start)

print_data(data)

'Computer Science' 카테고리의 다른 글

azure response fast  (0) 2020.08.11
azure slow response  (0) 2020.08.11
파이썬 키움 api 사용  (0) 2020.03.25
8 Problem A : Bit_ImageMap1  (0) 2020.02.10
5 Problem C : 루빅의 사각형  (0) 2020.02.04

댓글