-
전처리Cording/Python 2021. 1. 28. 21:24
import pandas as pd
file_path = 'chipotle.tsv'
chipo = pd.read_csv('chipotle.tsv', sep = '\t')
chipo.head()
chipo.shape
print(chipo.info())
chipo.describe()
chipo.columns
chipo.index
chipo['order_id'] = chipo['order_id'].astype(str)
chipo.describe()
item_count = chipo['item_name'].value_counts()[:10] # 상위 10개
item_count
for index. (val, cnt) in enumerate(item_count.iteritems(), 1):
print("Top", idex, ":", val, cnt)
order_count = chipo.groupby('item_name')['order_id'].count()
order_count[:10]
order_count = chipo.groupby('item_name')['quantity'].sum()
order_count[:10]
import numpy as np
import matplotlib.pyplot as plt
item_name_list = item_quantity.index.tolist()
x_pos = np.arange(len(item_name_list))
order_cnt = item_quantity.values.tolist()
plt.bar(x_pos, order_cnt, align='center')
plt.ylabel('ordered_item_count'_
plt.title('Distribution of all ordered item')
plt.show()
* item_quantity.index.tolist()
* item_quantity.values.tolist()
# 전처리, 문자를 숫자로, $ 제거
chipo['item_price']=chipo['item_price'].apply(lambada x: float(x[1:]))
# 주문당 평균 계산 금액
chipo.groupby('order_id')[item_price'].sum().mean()
# 한 주문 10달러 이상 지불한 주문 번호
chipo_orderid_group = chipo.groupby('order_id').sum()
results = chipo_orderid_group[chipo_orderid_group.item_price >=10]
print(results[:10])
print(results.index.values)
chipo_one_item = chipo[chipo.quantity == 1]
price_per_item = chipo_one_item.groupby('item_name').min()
price_per_item.sort_values(by = 'tem_price', ascending = False)[:10]
import pandas as pd
import numpy as np
import matplotlib.pyplot
file_path = 'drinks.csv'
drinks = pd.read_csv(file_path)
# 결측 데이터 전처리
print(drinks.isnull().sum())
print(drinks.dtypes)
drinks['continent'] = drinks['continent'].fillna('OT') # 결측값을 OT로 채울 것.
print(drinks.isnull().sum())
len(drinks['continent'.isnull())
# 파이차트 시각화
drinks['continent'].value_counts()
'Cording > Python' 카테고리의 다른 글
for, 문자열 (0) 2021.02.05 machine learning (0) 2021.02.01 Graph (0) 2021.01.22 jupyter-data visualization (0) 2021.01.21 Juypter-data2 (0) 2021.01.20