ABOUT ME

-

Today
-
Yesterday
-
Total
-
  • 전처리
    Cording/Python 2021. 1. 28. 21:24

    import pandas as pd

     

    file_path = 'chipotle.tsv'

    chipo = pd.read_csv('chipotle.tsv', sep = '\t')

     

    chipo.head()

    chipo.shape

    print(chipo.info())

    chipo.describe()

    chipo.columns

    chipo.index

     

    chipo['order_id'] = chipo['order_id'].astype(str)

    chipo.describe()

     

    item_count = chipo['item_name'].value_counts()[:10] # 상위 10

    item_count

     

    for index. (val, cnt) in enumerate(item_count.iteritems(), 1):

    print("Top", idex, ":", val, cnt)

     

    order_count = chipo.groupby('item_name')['order_id'].count()

    order_count[:10]

     

    order_count = chipo.groupby('item_name')['quantity'].sum()

    order_count[:10]

     

    import numpy as np

    import matplotlib.pyplot as plt

     

    item_name_list = item_quantity.index.tolist()

    x_pos = np.arange(len(item_name_list))

    order_cnt = item_quantity.values.tolist()

     

    plt.bar(x_pos, order_cnt, align='center')

    plt.ylabel('ordered_item_count'_

    plt.title('Distribution of all ordered item')

    plt.show()

     

    * item_quantity.index.tolist()

    * item_quantity.values.tolist()

     

    # 전처리, 문자를 숫자로, $ 제거

    chipo['item_price']=chipo['item_price'].apply(lambada x: float(x[1:]))

     

    # 주문당 평균 계산 금액

    chipo.groupby('order_id')[item_price'].sum().mean()

     

    # 한 주문 10달러 이상 지불한 주문 번호

    chipo_orderid_group = chipo.groupby('order_id').sum()

     

    results = chipo_orderid_group[chipo_orderid_group.item_price >=10]

    print(results[:10])

    print(results.index.values)

     

    chipo_one_item = chipo[chipo.quantity == 1]

    price_per_item = chipo_one_item.groupby('item_name').min()

    price_per_item.sort_values(by = 'tem_price', ascending = False)[:10]

     

    import pandas as pd

    import numpy as np

    import matplotlib.pyplot

     

    file_path = 'drinks.csv'

    drinks = pd.read_csv(file_path)

     

    # 결측 데이터 전처리

     

    print(drinks.isnull().sum())

    print(drinks.dtypes)

    drinks['continent'] = drinks['continent'].fillna('OT') # 결측값을 OT로 채울 것.

     

    print(drinks.isnull().sum())

    len(drinks['continent'.isnull())

     

    # 파이차트 시각화

    drinks['continent'].value_counts()

     

    'Cording > Python' 카테고리의 다른 글

    for, 문자열  (0) 2021.02.05
    machine learning  (0) 2021.02.01
    Graph  (0) 2021.01.22
    jupyter-data visualization  (0) 2021.01.21
    Juypter-data2  (0) 2021.01.20

    댓글

Designed by Tistory.