合并具有多个索引和列值的数据帧

Merging dataframes with multi indexes and column value(合并具有多个索引和列值的数据帧)

本文介绍了合并具有多个索引和列值的数据帧的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我有两个以多个索引和日期为一列的数据帧:

df1

df1 = pd.DataFrame.from_dict({('group', ''): {0: 'A',
  1: 'A',
  2: 'A',
  3: 'A',
  4: 'A',
  5: 'A',
  6: 'A',
  7: 'A',
  8: 'B',
  9: 'B',
  10: 'B',
  11: 'B',
  12: 'B',
  13: 'B',
  14: 'B',
  15: 'B',
  16: 'C',
  17: 'C',
  18: 'C',
  19: 'C',
  20: 'C',
  21: 'C',
  22: 'C',
  23: 'C',
  24: 'D',
  25: 'D',
  26: 'D',
  27: 'D',
  28: 'D',
  29: 'D',
  30: 'D'},
 ('category', ''): {0: 'Apple',
  1: 'Amazon',
  2: 'Google',
  3: 'Netflix',
  4: 'Facebook',
  5: 'Uber',
  6: 'Tesla',
  7: 'total',
  8: 'Apple',
  9: 'Amazon',
  10: 'Google',
  11: 'Netflix',
  12: 'Facebook',
  13: 'Uber',
  14: 'Tesla',
  15: 'total',
  16: 'Apple',
  17: 'Amazon',
  18: 'Google',
  19: 'Netflix',
  20: 'Facebook',
  21: 'Uber',
  22: 'Tesla',
  23: 'total',
  24: 'Apple',
  25: 'Amazon',
  26: 'Google',
  27: 'Netflix',
  28: 'Uber',
  29: 'Tesla',
  30: 'total'},
 (pd.Timestamp('2021-06-28 00:00:00'), 'total_orders'): {0: 88.0,
  1: 66.0,
  2: 191.0,
  3: 558.0,
  4: 12.0,
  5: 4.0,
  6: 56.0,
  7: 975.0,
  8: 90.0,
  9: 26.0,
  10: 49.0,
  11: 250.0,
  12: 7.0,
  13: 2.0,
  14: 44.0,
  15: 468.0,
  16: 36.0,
  17: 52.0,
  18: 94.0,
  19: 750.0,
  20: 10.0,
  21: 0.0,
  22: 52.0,
  23: 994.0,
  24: 16.0,
  25: 22.0,
  26: 5.0,
  27: 57.0,
  28: 3.0,
  29: 33.0,
  30: 136.0},
 (pd.Timestamp('2021-06-28 00:00:00'), 'total_sales'): {0: 4603.209999999999,
  1: 2485.059999999998,
  2: 4919.39999999998,
  3: 6097.77,
  4: 31.22,
  5: 155.71,
  6: 3484.99,
  7: 17237.35999999996,
  8: 561.54,
  9: 698.75,
  10: 1290.13,
  11: 4292.68000000001,
  12: 947.65,
  13: 329.0,
  14: 2889.65,
  15: 9989.4,
  16: 330.8899999999994,
  17: 2076.26,
  18: 2982.270000000004,
  19: 11978.62000000002,
  20: 683.0,
  21: 0.0,
  22: 3812.16999999999,
  23: 20963.21000000002,
  24: 234.4900000000002,
  25: 896.1,
  26: 231.0,
  27: 893.810000000001,
  28: 129.0,
  29: 1712.329999999998,
  30: 4106.729999999996},
 (pd.Timestamp('2021-07-05 00:00:00'), 'total_orders'): {0: 109.0,
  1: 48.0,
  2: 174.0,
  3: 592.0,
  4: 13.0,
  5: 5.0,
  6: 43.0,
  7: 984.0,
  8: 62.0,
  9: 13.0,
  10: 37.0,
  11: 196.0,
  12: 8.0,
  13: 1.0,
  14: 3.0,
  15: 30.0,
  16: 76.0,
  17: 5.0,
  18: 147.0,
  19: 88.0,
  20: 8.0,
  21: 1.0,
  22: 78.0,
  23: 1248.0,
  24: 1.0,
  25: 18.0,
  26: 23.0,
  27: 83.0,
  28: 0.0,
  29: 29.0,
  30: 154.0},
 (pd.Timestamp('2021-07-05 00:00:00'), 'total_sales'): {0: 3453.02,
  1: 17868.730000000003,
  2: 44707.82999999999,
  3: 61425.99,
  4: 1261.0,
  5: 1914.6000000000001,
  6: 24146.09,
  7: 154777.25999999998,
  8: 6201.489999999999,
  9: 5513.960000000001,
  10: 9645.87,
  11: 25086.785,
  12: 663.0,
  13: 448.61,
  14: 26332.7,
  15: 73892.415,
  16: 556.749999999999,
  17: 1746.859999999997,
  18: 4103.219999999994,
  19: 15571.52000000008,
  20: 86.0,
  21: 69.0,
  22: 5882.759999999995,
  23: 26476.11000000004,
  24: 53.0,
  25: 801.220000000001,
  26: 684.56,
  27: 1232.600000000002,
  28: 0.0,
  29: 15902.1,
  30: 43943.48},
 (pd.Timestamp('2021-07-12 00:00:00'), 'total_orders'): {0: 32.0,
  1: 15.0,
  2: 89.0,
  3: 239.0,
  4: 2.0,
  5: 3.0,
  6: 20.0,
  7: 400.0,
  8: 0.0,
  9: 0.0,
  10: 0.0,
  11: 0.0,
  12: 0.0,
  13: 0.0,
  14: 0.0,
  15: 0.0,
  16: 21.0,
  17: 14.0,
  18: 58.0,
  19: 281.0,
  20: 3.0,
  21: 3.0,
  22: 33.0,
  23: 413.0,
  24: 7.0,
  25: 6.0,
  26: 4.0,
  27: 13.0,
  28: 0.0,
  29: 18.0,
  30: 48.0},
 (pd.Timestamp('2021-07-12 00:00:00'), 'total_sales'): {0: 2147.7000000000003,
  1: 4767.3,
  2: 2399.300000000003,
  3: 3137.440000000002,
  4: 178.0,
  5: 866.61,
  6: 10639.03,
  7: 73235.38,
  8: 0.0,
  9: 0.0,
  10: 0.0,
  11: 0.0,
  12: 0.0,
  13: 0.0,
  14: 0.0,
  15: 0.0,
  16: 220.94,
  17: 727.5199999999995,
  18: 2500.96999999999,
  19: 4414.00999999998,
  20: 15.0,
  21: 196.71,
  22: 2170.1,
  23: 9745.24999999997,
  24: 126.55,
  25: 290.2,
  26: 146.01,
  27: 233.0,
  28: 0.0,
  29: 973.18,
  30: 1658.940000000002}}).set_index(['group','category'])

df2

df2 = pd.DataFrame.from_dict({'group': {0: 'total_full',
  1: 'total_full',
  2: 'A',
  3: 'A',
  4: 'B',
  5: 'B',
  6: 'C',
  7: 'C',
  8: 'D',
  9: 'D',
  10: 'Apple_total',
  11: 'Apple_total',
  12: 'A',
  13: 'A',
  14: 'B',
  15: 'B',
  16: 'C',
  17: 'C',
  18: 'D',
  19: 'D',
  20: 'Amazon_total',
  21: 'Amazon_total',
  22: 'A',
  23: 'A',
  24: 'B',
  25: 'B',
  26: 'C',
  27: 'C',
  28: 'D',
  29: 'D',
  30: 'Google_total',
  31: 'Google_total',
  32: 'A',
  33: 'A',
  34: 'B',
  35: 'B',
  36: 'C',
  37: 'C',
  38: 'D',
  39: 'D',
  40: 'Facebook_total',
  41: 'Facebook_total',
  42: 'A',
  43: 'A',
  44: 'B',
  45: 'B',
  46: 'C',
  47: 'C',
  48: 'D',
  49: 'D',
  50: 'Netflix_total',
  51: 'Netflix_total',
  52: 'A',
  53: 'A',
  54: 'B',
  55: 'B',
  56: 'C',
  57: 'C',
  58: 'D',
  59: 'D',
  60: 'Tesla_total',
  61: 'Tesla_total',
  62: 'A',
  63: 'A',
  64: 'B',
  65: 'B',
  66: 'C',
  67: 'C',
  68: 'D',
  69: 'D',
  70: 'Uber_total',
  71: 'Uber_total',
  72: 'A',
  73: 'A',
  74: 'B',
  75: 'B',
  76: 'C',
  77: 'C',
  78: 'D',
  79: 'D'},
 'category': {0: 'total_full',
  1: 'total_full',
  2: 'group_total',
  3: 'group_total',
  4: 'group_total',
  5: 'group_total',
  6: 'group_total',
  7: 'group_total',
  8: 'group_total',
  9: 'group_total',
  10: 'Apple_total',
  11: 'Apple_total',
  12: 'Apple',
  13: 'Apple',
  14: 'Apple',
  15: 'Apple',
  16: 'Apple',
  17: 'Apple',
  18: 'Apple',
  19: 'Apple',
  20: 'Amazon_total',
  21: 'Amazon_total',
  22: 'Amazon',
  23: 'Amazon',
  24: 'Amazon',
  25: 'Amazon',
  26: 'Amazon',
  27: 'Amazon',
  28: 'Amazon',
  29: 'Amazon',
  30: 'Google_total',
  31: 'Google_total',
  32: 'Google',
  33: 'Google',
  34: 'Google',
  35: 'Google',
  36: 'Google',
  37: 'Google',
  38: 'Google',
  39: 'Google',
  40: 'Facebook_total',
  41: 'Facebook_total',
  42: 'Facebook',
  43: 'Facebook',
  44: 'Facebook',
  45: 'Facebook',
  46: 'Facebook',
  47: 'Facebook',
  48: 'Facebook',
  49: 'Facebook',
  50: 'Netflix_total',
  51: 'Netflix_total',
  52: 'Netflix',
  53: 'Netflix',
  54: 'Netflix',
  55: 'Netflix',
  56: 'Netflix',
  57: 'Netflix',
  58: 'Netflix',
  59: 'Netflix',
  60: 'Tesla_total',
  61: 'Tesla_total',
  62: 'Tesla',
  63: 'Tesla',
  64: 'Tesla',
  65: 'Tesla',
  66: 'Tesla',
  67: 'Tesla',
  68: 'Tesla',
  69: 'Tesla',
  70: 'Uber_total',
  71: 'Uber_total',
  72: 'Uber',
  73: 'Uber',
  74: 'Uber',
  75: 'Uber',
  76: 'Uber',
  77: 'Uber',
  78: 'Uber',
  79: 'Uber'},
 'type': {0: 'Sales_1',
  1: 'Sales_2',
  2: 'Sales_1',
  3: 'Sales_2',
  4: 'Sales_1',
  5: 'Sales_2',
  6: 'Sales_1',
  7: 'Sales_2',
  8: 'Sales_1',
  9: 'Sales_2',
  10: 'Sales_1',
  11: 'Sales_2',
  12: 'Sales_1',
  13: 'Sales_2',
  14: 'Sales_1',
  15: 'Sales_2',
  16: 'Sales_1',
  17: 'Sales_2',
  18: 'Sales_1',
  19: 'Sales_2',
  20: 'Sales_1',
  21: 'Sales_2',
  22: 'Sales_1',
  23: 'Sales_2',
  24: 'Sales_1',
  25: 'Sales_2',
  26: 'Sales_1',
  27: 'Sales_2',
  28: 'Sales_1',
  29: 'Sales_2',
  30: 'Sales_1',
  31: 'Sales_2',
  32: 'Sales_1',
  33: 'Sales_2',
  34: 'Sales_1',
  35: 'Sales_2',
  36: 'Sales_1',
  37: 'Sales_2',
  38: 'Sales_1',
  39: 'Sales_2',
  40: 'Sales_1',
  41: 'Sales_2',
  42: 'Sales_1',
  43: 'Sales_2',
  44: 'Sales_1',
  45: 'Sales_2',
  46: 'Sales_1',
  47: 'Sales_2',
  48: 'Sales_1',
  49: 'Sales_2',
  50: 'Sales_1',
  51: 'Sales_2',
  52: 'Sales_1',
  53: 'Sales_2',
  54: 'Sales_1',
  55: 'Sales_2',
  56: 'Sales_1',
  57: 'Sales_2',
  58: 'Sales_1',
  59: 'Sales_2',
  60: 'Sales_1',
  61: 'Sales_2',
  62: 'Sales_1',
  63: 'Sales_2',
  64: 'Sales_1',
  65: 'Sales_2',
  66: 'Sales_1',
  67: 'Sales_2',
  68: 'Sales_1',
  69: 'Sales_2',
  70: 'Sales_1',
  71: 'Sales_2',
  72: 'Sales_1',
  73: 'Sales_2',
  74: 'Sales_1',
  75: 'Sales_2',
  76: 'Sales_1',
  77: 'Sales_2',
  78: 'Sales_1',
  79: 'Sales_2'},
 '2021-06-28': {0: 67.5277641202152,
  1: 82.7854700135998,
  2: 21.50082266792856,
  3: 22.03644997199996,
  4: 64.460440147,
  5: 10.1060499896,
  6: 65.1530371974946,
  7: 50.6429700519999,
  8: 56.413464107792045,
  9: 0,
  10: 17.48074540313092,
  11: 26.8376199976,
  12: 52.172,
  13: 61.16600000040001,
  14: 20.9447844,
  15: 40.69122000000001,
  16: 83.55718929717925,
  17: 14.98039999719995,
  18: 20.806771705951697,
  19: np.nan,
  20: 18.3766353690825,
  21: 12.82565001479992,
  22: 52.425508769690694,
  23: 25.661999978399994,
  24: 17.88071596,
  25: 24.384659998799997,
  26: 91.10086982794643,
  27: 12.77899003759993,
  28: 16.969540811445366,
  29: np.nan,
  30: 18.8795397517309,
  31: 26.73017999840005,
  32: 53.52039700062155,
  33: 58.81199999639999,
  34: 12.1243325,
  35: 24.0544100028,
  36: 55.94068246571674,
  37: 133.86376999920006,
  38: 7.294127785392621,
  39: np.nan,
  40: 6.07807089184563,
  41: 7.27483001599998,
  42: 2.300470581874837,
  43: 30.71300000639998,
  44: 5.810764652,
  45: 12.333119997600003,
  46: 25.475930745418292,
  47: 64.228710012,
  48: 9.490904912552498,
  49: np.nan,
  50: 8.184780211399392,
  51: 24.59321999400001,
  52: 6.807138946302334,
  53: 12.0879999972,
  54: 0.869207661,
  55: 0.324,
  56: 0.5084336040970575,
  57: 12.181219996800007,
  58: 0,
  59: np.nan,
  60: 9.293956915067886,
  61: 11.171379993599999,
  62: 6.384936971649232,
  63: 3.657999996,
  64: 0.913782413,
  65: 1.9992000012000002,
  66: 1.5322078073061867,
  67: 5.514179996399999,
  68: 0.4630297231124678,
  69: np.nan,
  70: 36.23403557795798,
  71: 53.35258999919999,
  72: 21.890370397789923,
  73: 9.937449997200002,
  74: 5.916852561,
  75: 6.319439989199998,
  76: 7.03772344983066,
  77: 37.095700012799995,
  78: 1.3890891693374032,
  79: np.nan},
 '2021-07-05': {0: 65.4690491915759,
  1: 98.5235100112003,
  2: 21.4573181155924,
  3: 241.06741999679997,
  4: 67.481716829,
  5: 11.60325000040002,
  6: 27.5807099999998,
  7: 65.8528400140003,
  8: 58.949304246983736,
  9: 0.0,
  10: 185.65887577993723,
  11: 318.9965699964001,
  12: 54.517,
  13: 66.55265999039996,
  14: 21.92632044,
  15: 43.67116000320002,
  16: 87.47349898707688,
  17: 208.7727500028001,
  18: 21.742056352860352,
  19: np.nan,
  20: 16.6038963173654,
  21: 25.28952001920013,
  22: 54.7820864335212,
  23: 36.75802000560001,
  24: 18.71872129,
  25: 30.1634600016,
  26: 95.37075040035738,
  27: 138.3680400120001,
  28: 17.73233819348684,
  29: np.nan,
  30: 14.80302342121337,
  31: 251.83851001200003,
  32: 55.926190956481534,
  33: 72.4443400032,
  34: 12.69221484,
  35: 26.032340003999998,
  36: 58.56261169338368,
  37: 153.36183000480003,
  38: 7.622005931348156,
  39: np.nan,
  40: 72.24367956241771,
  41: 14.83083001279999,
  42: 29.5726042895728,
  43: 38.723000005199985,
  44: 6.083562133,
  45: 12.845630001599998,
  46: 26.66998281055652,
  47: 63.26220000600001,
  48: 9.917530329288393,
  49: np.nan,
  50: 8.555606693927,
  51: 23.802009994800002,
  52: 7.113126469779095,
  53: 7.206999998399999,
  54: 0.910216433,
  55: 1.4089999991999997,
  56: 0.5322637911479053,
  57: 15.186009997200001,
  58: 0.0,
  59: np.nan,
  60: 9.716385738295367,
  61: 14.7327399948,
  62: 6.671946105284065,
  63: 5.691999996,
  64: 0.956574175,
  65: 1.0203399996,
  66: 1.6040220980113027,
  67: 8.020399999199999,
  68: 0.4838433599999999,
  69: np.nan,
  70: 37.88758167841983,
  71: 59.03332998119994,
  72: 22.874363860953647,
  73: 13.690399997999998,
  74: 6.194107518,
  75: 6.4613199911999954,
  76: 7.367580219466185,
  77: 38.881609991999944,
  78: 1.4515300799999995,
  79: np.nan},
 '2021-07-12': {0: 607.2971827405001,
  1: 88.9671100664001,
  2: 21.26749278974862,
  3: 17.1524199804,
  4: 64.471138092,
  5: 89.84481002279999,
  6: 26.2044999999998,
  7: 51.9698800632001,
  8: 5.354051858751745,
  9: 0.0,
  10: 177.42361595891452,
  11: 287.5395700032,
  12: 52.117,
  13: 47.388199995600004,
  14: 20.94835038,
  15: 41.4250800048,
  16: 83.57340667555117,
  17: 198.72629000280003,
  18: 20.784858903363354,
  19: np.nan,
  20: 178.323907459086,
  21: 185.83897002839998,
  22: 52.37029646474982,
  23: 27.87144997800001,
  24: 17.88339044,
  25: 23.645340010799984,
  26: 91.11855133792106,
  27: 134.3221800396,
  28: 16.95166921641509,
  29: np.nan,
  30: 128.82813286243115,
  31: 192.6867300156,
  32: 53.46403160619618,
  33: 41.412320006399995,
  34: 12.1261155,
  35: 11.840830002000002,
  36: 55.95153983444301,
  37: 139.43358000720002,
  38: 7.286445921791947,
  39: np.nan,
  40: 69.04410667683521,
  41: 93.877410018,
  42: 28.270665735943805,
  43: 27.512680004399986,
  44: 5.811656147,
  45: 5.2319800032,
  46: 25.480875296710053,
  47: 61.132750010400024,
  48: 9.480909497181356,
  49: np.nan,
  50: 8.178601399067174,
  51: 17.6743199976,
  52: 6.7999699585309585,
  53: 6.131999998799999,
  54: 0.870099156,
  55: 0.6185600004,
  56: 0.5085322845362154,
  57: 10.923759998400003,
  58: 0.0,
  59: np.nan,
  60: 9.287042311133577,
  61: 19.966500000000007,
  62: 6.378212628950804,
  63: 6.524999997600001,
  64: 0.913782413,
  65: 1.9303400016,
  66: 1.5325051891827732,
  67: 11.511160000800006,
  68: 0.4625420799999998,
  69: np.nan,
  70: 36.21177607303267,
  71: 51.3836100036,
  72: 21.86731639537707,
  73: 10.310769999600003,
  74: 5.917744056,
  75: 5.152679999999999,
  76: 7.039089381655591,
  77: 35.920160003999996,
  78: 1.3876262399999995,
  79: np.nan}}).set_index(['group','category','type'])

我正在尝试合并df2ondf1bygroup, category, date (date is a column),以便我的输出如下所示:

我在所需的输出示例中省略了df2df2中的值,但应该用df2中的groupcategory值填充这些行。

                    2021-06-28                                                               2021-07-05                                                      2021-07-12
                    total_orders    total_sales         sales_1        sales_2                  total_orders    total_sales         sales_1        sales_2            total_orders    total_sales         sales_1        sales_2
group    category
A        Apple        88.000          4,603.210
         Amazon       66.000          2,485.060
         Google      191.000          4,919.400
         Netflix     558.000          6,097.770
         Facebook     12.000             31.220
         Uber          4.000            155.710
         Tesla        56.000          3,484.990
         total       975.000         17,237.360
B        Apple        90.000            561.540
         Amazon       26.000            698.750
         Google       49.000          1,290.130
         Netflix     250.000          4,292.680
         Facebook      7.000            947.650
         Uber          2.000            329.000
         Tesla        44.000          2,889.650
         total       468.000          9,989.400
C        Apple        36.000            330.890
         Amazon       52.000          2,076.260
         Google       94.000          2,982.270
         Netflix     750.000         11,978.620
         Facebook     10.000            683.000
         Uber          0.000              0.000
         Tesla        52.000          3,812.170
         total       994.000         20,963.210
D        Apple        16.000            234.490
         Amazon       22.000            896.100
         Google        5.000            231.000
         Netflix      57.000            893.810
         Uber          3.000            129.000
         Tesla        33.000          1,712.330
         total       136.000          4,106.730
以便sales_1 & sales_2group & category上合并,并在同一date列上。可以忽略df2中的total_x,因为它可以从字段中计算出来。

合并中不使用total_values,只使用它后面的

我尝试的内容:

df1.reset_index().merge(df2.reset_index(), left_on=['group', 'category'], right_on=['group', 'category'])

这会抛出警告:

UserWarning:合并不同级别可能会产生意外的结果(左侧2个级别,右侧1个级别)

而且不是我期望的合并方式。如何才能获得所需的输出?

使用

df = df1.merge(df2.unstack(), left_index=True, right_index=True)

生产:

是否只是重新排序列,因为我希望它有一个唯一的日期和4列?或者可能是某个日期已00:00:00

推荐答案

先在df2的列中创建DatetimeIndex,然后unstack并按MultiIndexES:

合并
f = lambda x: pd.to_datetime(x)
df = (df1.merge(df2.rename(columns=f).unstack(), left_index=True, right_index=True)
        .sort_index(axis=1))

print (df.head())

               2021-06-28                                  2021-07-05  
                  Sales_1 Sales_2 total_orders total_sales    Sales_1   
group category                                                          
A     Apple     52.172000  61.166         88.0     4603.21  54.517000   
      Amazon    52.425509  25.662         66.0     2485.06  54.782086   
      Google    53.520397  58.812        191.0     4919.40  55.926191   
      Netflix    6.807139  12.088        558.0     6097.77   7.113126   
      Facebook   2.300471  30.713         12.0       31.22  29.572604   

                                                  2021-07-12            
                 Sales_2 total_orders total_sales    Sales_1   Sales_2   
group category                                                           
A     Apple     66.55266        109.0     3453.02  52.117000  47.38820   
      Amazon    36.75802         48.0    17868.73  52.370296  27.87145   
      Google    72.44434        174.0    44707.83  53.464032  41.41232   
      Netflix    7.20700        592.0    61425.99   6.799970   6.13200   
      Facebook  38.72300         13.0     1261.00  28.270666  27.51268   

                                         
               total_orders total_sales  
group category                           
A     Apple            32.0     2147.70  
      Amazon           15.0     4767.30  
      Google           89.0     2399.30  
      Netflix         239.0     3137.44  
      Facebook          2.0      178.00  

这篇关于合并具有多个索引和列值的数据帧的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持编程学习网!

本文标题为:合并具有多个索引和列值的数据帧