import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# 노트북 안에 그래프를 그리기 위해
%matplotlib inline

# 그래프에서 격자로 숫자 범위가 눈에 잘 띄도록 ggplot 스타일을 사용
plt.style.use('ggplot')

# 그래프에서 마이너스 폰트 깨지는 문제에 대한 대처
mpl.rcParams['axes.unicode_minus'] = False

!apt -qq -y install fonts-nanum

# 한글 깨짐 문제 해결  
import matplotlib.font_manager as fm
fontpath = '/usr/share/fonts/truetype/nanum/NanumBarunGothic.ttf'
font = fm.FontProperties(fname=fontpath, size=9)
plt.rc('font', family='NanumBarunGothic') 
mpl.font_manager._rebuild()

/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
  import pandas.util.testing as tm

The following NEW packages will be installed:
  fonts-nanum
0 upgraded, 1 newly installed, 0 to remove and 25 not upgraded.
Need to get 9,604 kB of archives.
After this operation, 29.5 MB of additional disk space will be used.
Selecting previously unselected package fonts-nanum.
(Reading database ... 144568 files and directories currently installed.)
Preparing to unpack .../fonts-nanum_20170925-1_all.deb ...
Unpacking fonts-nanum (20170925-1) ...
Setting up fonts-nanum (20170925-1) ...
Processing triggers for fontconfig (2.12.6-0ubuntu2) ...

# colab 에서 google drive 접근
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive

bike = pd.read_csv('/content/gdrive/My Drive/data/bicycle-hourtime-201905-test.csv', parse_dates=["대여일자"] ,encoding='cp949')

bike["년"] = bike["대여일자"].dt.year
bike["월"] = bike["대여일자"].dt.month
bike["일"] = bike["대여일자"].dt.day

pivot data 생성¶

bike.columns

Index(['대여일자', '요일', '요일_New', '주중주말구분', '대여시간', '대여소번호', '대여소명', '지구',
       '지구_New', '거치대수', '대여구분코드', '대여구분코드_new', '성별', '성별_New', '연령대코드',
       '연령대코드_New', '이용건수', '운동량', '탄소량', '이동거리', '사용시간', '년', '월', '일'],
      dtype='object')

bike_sum_1 = pd.pivot_table(bike, index=["지구","주중주말구분"],values=["이용건수"],aggfunc=np.sum)
bike_sum_1.rename(columns={'이용건수':'이용건수합계'}, inplace=True)
# bike_sum['이용률'] = bike_sum['이용건수합계']/bike_sum['거치대수합계']
bike_sum_1

# bike_sum_2 = bike.groupby(bike['대여소번호'])
bike_sum_2 = pd.pivot_table(bike, index=["지구","대여소번호"],values=["거치대수"],aggfunc=np.average)
# bike_sum_1.rename(columns={'이용건수':'이용건수합계'}, inplace=True)
# bike_sum['이용률'] = bike_sum['이용건수합계']/bike_sum['거치대수합계']
bike_sum_2
bike_sum_3 = pd.pivot_table(bike_sum_2,index=["지구"], values=["거치대수"],aggfunc=np.sum)
bike_sum_3

bike_sum_1.columns

Index(['이용건수합계'], dtype='object')

bike_sum_3['거치대수']

지구
공원    266
상업    335
주거    474
Name: 거치대수, dtype: int64

bike_sum_1['지구별거치대수']= pd.Series([266,355,474,266,355,474], index=[('공원',0),('상업',0),('주거',0),('공원',1),('상업',1),('주거',1)])
bike_sum_1['이용률']=bike_sum_1['이용건수합계']/bike_sum_1['지구별거치대수']
bike_sum_1 #지구별 
bike_sum_1.sort_values(by=['주중주말구분'],axis=0) # 주중주말구분별

주중 이용률에서 상업, 주거 이용률이 공원 이용률보다 1.8배 정도 높다.

[ML] t-SNE 특징 및 예제 (0)	2021.01.31
[python] 공공자전거 데이터 분석(3) - 상관관계 분석 (0)	2020.04.27
[python] 공공자전거 데이터 분석(2) - histogram (0)	2020.04.26

춤추는 개발자

[python] 공공자전거 데이터 분석(4) - pivot data 생성

pivot data 생성

pivot data 생성¶

'Study > Data Analysis' 카테고리의 다른 글

'Study/Data Analysis'의 다른글

티스토리툴바

		이용건수합계
지구	주중주말구분
공원	0	10099
공원	1	4411
상업	0	24335
상업	1	6105
주거	0	32186
주거	1	9245

		이용건수합계	지구별거치대수	이용률
지구	주중주말구분
공원	0	10099	266	37.966165
상업	0	24335	355	68.549296
주거	0	32186	474	67.902954
공원	1	4411	266	16.582707
상업	1	6105	355	17.197183
주거	1	9245	474	19.504219

[python] 공공자전거 데이터 분석(4) - pivot data 생성

pivot data 생성

pivot data 생성¶

'Study > Data Analysis' 카테고리의 다른 글

'Study/Data Analysis'의 다른글

관련글

티스토리툴바