Malloy Documentation
search

Malloy separates queries into a source and a view on that source. Views can be defined in a source or used directly in queries. Views that are named as part of a source extension can be reused, nested, and refined.

A view consists of one or more stages separated by ->s. Most views only have one stage, but multi-stage views can be used for more complex analysis.

Reduction vs Projection

Each stage of a view performs a transformation of one of two kinds:

  • Reduction: reduce the grain of the data

  • Projection: select fields without reducing

Reductions use group_by: and/or aggregate:, whereas projections use select:.

Reduction

The following is an example of a reduction:

document
run: flights -> {
  group_by: carrier        
  aggregate: flight_count is count()
}
QUERY RESULTS
carrierflight_​count
WN88,751
US37,683
AA34,577
NW33,580
UA32,757
[
  {
    "carrier": "WN",
    "flight_count": 88751
  },
  {
    "carrier": "US",
    "flight_count": 37683
  },
  {
    "carrier": "AA",
    "flight_count": 34577
  },
  {
    "carrier": "NW",
    "flight_count": 33580
  },
  {
    "carrier": "UA",
    "flight_count": 32757
  }
]
SELECT 
   flights."carrier" as "carrier",
   COUNT( 1) as "flight_count"
FROM '../data/flights.parquet' as flights
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Projection

The following is an example of a projection:

document
run: flights -> {
  select: *
  limit: 20
}
QUERY RESULTS
arr_​delayarr_​timecancelledcarrierdep_​delaydep_​timedestination_​codedistancedivertedflight_​numflight_​timeid2origin_​codetail_​numtaxi_​intaxi_​out
-62004-11-18 23:09:00NUS-32004-11-18 22:32:00ABE55N16921530,272,525PHLN806MD418
02004-10-12 21:28:00NUS62004-10-12 20:46:00ABE55N16501829,742,442PHLN806MD420
22004-11-24 11:14:00NUS02004-11-24 10:20:00ABE55N16161930,270,885PHLN816MA530
-192004-08-31 21:06:00NUS02004-08-31 20:30:00ABE55N16501728,344,746PHLN806MD415
-192004-07-27 10:59:00NUS-42004-07-27 10:21:00ABE55N16431727,898,410PHLN806MD417
[
  {
    "arr_delay": -6,
    "arr_time": "2004-11-18T23:09:00.000Z",
    "cancelled": "N",
    "carrier": "US",
    "dep_delay": -3,
    "dep_time": "2004-11-18T22:32:00.000Z",
    "destination_code": "ABE",
    "distance": 55,
    "diverted": "N",
    "flight_num": "1692",
    "flight_time": 15,
    "id2": 30272525,
    "origin_code": "PHL",
    "tail_num": "N806MD",
    "taxi_in": 4,
    "taxi_out": 18
  },
  {
    "arr_delay": 0,
    "arr_time": "2004-10-12T21:28:00.000Z",
    "cancelled": "N",
    "carrier": "US",
    "dep_delay": 6,
    "dep_time": "2004-10-12T20:46:00.000Z",
    "destination_code": "ABE",
    "distance": 55,
    "diverted": "N",
    "flight_num": "1650",
    "flight_time": 18,
    "id2": 29742442,
    "origin_code": "PHL",
    "tail_num": "N806MD",
    "taxi_in": 4,
    "taxi_out": 20
  },
  {
    "arr_delay": 2,
    "arr_time": "2004-11-24T11:14:00.000Z",
    "cancelled": "N",
    "carrier": "US",
    "dep_delay": 0,
    "dep_time": "2004-11-24T10:20:00.000Z",
    "destination_code": "ABE",
    "distance": 55,
    "diverted": "N",
    "flight_num": "1616",
    "flight_time": 19,
    "id2": 30270885,
    "origin_code": "PHL",
    "tail_num": "N816MA",
    "taxi_in": 5,
    "taxi_out": 30
  },
  {
    "arr_delay": -19,
    "arr_time": "2004-08-31T21:06:00.000Z",
    "cancelled": "N",
    "carrier": "US",
    "dep_delay": 0,
    "dep_time": "2004-08-31T20:30:00.000Z",
    "destination_code": "ABE",
    "distance": 55,
    "diverted": "N",
    "flight_num": "1650",
    "flight_time": 17,
    "id2": 28344746,
    "origin_code": "PHL",
    "tail_num": "N806MD",
    "taxi_in": 4,
    "taxi_out": 15
  },
  {
    "arr_delay": -19,
    "arr_time": "2004-07-27T10:59:00.000Z",
    "cancelled": "N",
    "carrier": "US",
    "dep_delay": -4,
    "dep_time": "2004-07-27T10:21:00.000Z",
    "destination_code": "ABE",
    "distance": 55,
    "diverted": "N",
    "flight_num": "1643",
    "flight_time": 17,
    "id2": 27898410,
    "origin_code": "PHL",
    "tail_num": "N806MD",
    "taxi_in": 4,
    "taxi_out": 17
  }
]
SELECT 
   flights."arr_delay" as "arr_delay",
   flights."arr_time" as "arr_time",
   flights."cancelled" as "cancelled",
   flights."carrier" as "carrier",
   flights."dep_delay" as "dep_delay",
   flights."dep_time" as "dep_time",
   flights."destination" as "destination_code",
   flights."distance" as "distance",
   flights."diverted" as "diverted",
   flights."flight_num" as "flight_num",
   flights."flight_time" as "flight_time",
   flights."id2" as "id2",
   flights."origin" as "origin_code",
   flights."tail_num" as "tail_num",
   flights."taxi_in" as "taxi_in",
   flights."taxi_out" as "taxi_out"
FROM '../data/flights.parquet' as flights
LIMIT 20

Note that the operations in a stage are for the most part not order-sensitive like SQL; they can be arranged in any order.

View Operations

Views can contain a number of different kinds of operations which affect the behavior of the transformation. The following sections describe these various operations.

Fields

In a stage, fields (dimensions, measures, views, and calculations) may be specified either by referencing an existing name or defining them inline.

document
run: flights -> {
  group_by: carrier
  aggregate: flight_count is count()
}
QUERY RESULTS
carrierflight_​count
WN88,751
US37,683
AA34,577
NW33,580
UA32,757
[
  {
    "carrier": "WN",
    "flight_count": 88751
  },
  {
    "carrier": "US",
    "flight_count": 37683
  },
  {
    "carrier": "AA",
    "flight_count": 34577
  },
  {
    "carrier": "NW",
    "flight_count": 33580
  },
  {
    "carrier": "UA",
    "flight_count": 32757
  }
]
SELECT 
   flights."carrier" as "carrier",
   COUNT( 1) as "flight_count"
FROM '../data/flights.parquet' as flights
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Dimensions are included with group_by (or select in a projection), measures are included with aggregate:, and views are nested with nest:.

When referencing existing fields in a select: clause, wildcard expressions like *, or some_join.* may be used.

See the Fields section for more information about the different kinds of fields and how they can be defined.

Filters

Filters can be included in a view with where:, which is equivalent to SQL's WHERE clause.

document
run: flights -> {
  where: distance > 1000
  group_by: distance
  aggregate: flight_count
}
QUERY RESULTS
distanceflight_​count
1,0501,156
2,2881,153
1,0091,093
1,2351,093
1,0201,084
[
  {
    "distance": 1050,
    "flight_count": 1156
  },
  {
    "distance": 2288,
    "flight_count": 1153
  },
  {
    "distance": 1009,
    "flight_count": 1093
  },
  {
    "distance": 1235,
    "flight_count": 1093
  },
  {
    "distance": 1020,
    "flight_count": 1084
  }
]
SELECT 
   flights."distance" as "distance",
   COUNT( 1) as "flight_count"
FROM '../data/flights.parquet' as flights
WHERE flights."distance">1000
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Filters may be also be applied to a source, or a measure.

See the Filters section for more information.

See Post-Aggregation Filtering below for information about having:.

Ordering and Limiting

Views may also include ordering and limiting specifications.

document
run: flights -> {
  limit: 4
  group_by: carrier
  aggregate: flight_count
}
QUERY RESULTS
carrierflight_​count
WN88,751
US37,683
AA34,577
NW33,580
[
  {
    "carrier": "WN",
    "flight_count": 88751
  },
  {
    "carrier": "US",
    "flight_count": 37683
  },
  {
    "carrier": "AA",
    "flight_count": 34577
  },
  {
    "carrier": "NW",
    "flight_count": 33580
  }
]
SELECT 
   flights."carrier" as "carrier",
   COUNT( 1) as "flight_count"
FROM '../data/flights.parquet' as flights
GROUP BY 1
ORDER BY 2 desc NULLS LAST
LIMIT 4

For detailed information on ordering and limiting, see the Ordering and Limiting section.

Post-Aggregation Filtering

Views may filter entire groupings based on aggregate values using the having: clause, which corresponds to SQL's HAVING clause.

document
run: flights -> {
  group_by: carrier
  aggregate: flight_count
  having: flight_count > 35000
}
QUERY RESULTS
carrierflight_​count
WN88,751
US37,683
[
  {
    "carrier": "WN",
    "flight_count": 88751
  },
  {
    "carrier": "US",
    "flight_count": 37683
  }
]
SELECT 
   flights."carrier" as "carrier",
   COUNT( 1) as "flight_count"
FROM '../data/flights.parquet' as flights
GROUP BY 1
HAVING (COUNT( 1))>35000
ORDER BY 2 desc NULLS LAST

Calculations (Window Functions)

Calculations based on other groupings may be performed with the calculate: clause and analytic functions. See the Calculations section for details.

document
run: flights -> {
  group_by: carrier
  aggregate: flight_count
  calculate: flight_count_rank is rank()
}
QUERY RESULTS
carrierflight_​countflight_​count_​rank
WN88,7511
US37,6832
AA34,5773
NW33,5804
UA32,7575
[
  {
    "carrier": "WN",
    "flight_count": 88751,
    "flight_count_rank": 1
  },
  {
    "carrier": "US",
    "flight_count": 37683,
    "flight_count_rank": 2
  },
  {
    "carrier": "AA",
    "flight_count": 34577,
    "flight_count_rank": 3
  },
  {
    "carrier": "NW",
    "flight_count": 33580,
    "flight_count_rank": 4
  },
  {
    "carrier": "UA",
    "flight_count": 32757,
    "flight_count_rank": 5
  }
]
SELECT 
   flights."carrier" as "carrier",
   COUNT( 1) as "flight_count",
   RANK() OVER(  ORDER BY  COUNT( 1) desc NULLS LAST ) as "flight_count_rank"
FROM '../data/flights.parquet' as flights
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Refinements, Query partials and Shorthand

Query blocks write be written in parts, those parts can be either views in the source and combined with the refinement operator +

The query below

run: flights -> {
  group_by: carrier
  aggregate: flight_count
  limit: 10
}

can be written as below. Each of the parts of a query can be separated into a 'partial query'.

run: flights -> 
  {group_by: carrier}
  + {aggregate: flight_count}
  + {limit: 10}

The query can also be written as:

run: flights -> 
  carrier
  + flight_count
  + {limit: 10}

In the query above, the refienment operator + combines the parameters of a query so the query can be built from parts. Measure, dimension and views declared in the source can also be used. Referencing dimension carrier expands to a partial of {group_by: x}. Referencing measure flight_count expands to a partial query of {aggregate: flight_count}

Source Extensions

When writing a query, if additional source extensions are needed, they can be extend-ed into the source in the query expression, as in

run: some_source extend { extensions } -> { view operations }

Such extensions can also be included in a view using the extend: block.

document
run: flights -> {
  extend: {
    join_one: origin_airport is airports on origin_airport.code = origin_code
    dimension: origin_state is origin_airport.state
  }

  group_by: origin_state
  aggregate: flight_count
  limit: 5
}
QUERY RESULTS
origin_​stateflight_​count
CA40,670
TX40,085
FL24,242
IL20,850
GA20,014
[
  {
    "origin_state": "CA",
    "flight_count": 40670
  },
  {
    "origin_state": "TX",
    "flight_count": 40085
  },
  {
    "origin_state": "FL",
    "flight_count": 24242
  },
  {
    "origin_state": "IL",
    "flight_count": 20850
  },
  {
    "origin_state": "GA",
    "flight_count": 20014
  }
]
SELECT 
   origin_airport_0."state" as "origin_state",
   COUNT( 1) as "flight_count"
FROM '../data/flights.parquet' as flights
 LEFT JOIN '../data/airports.parquet' AS origin_airport_0
  ON origin_airport_0."code"=flights."origin"
GROUP BY 1
ORDER BY 2 desc NULLS LAST
LIMIT 5

When the view is defined inside a source, source extensions can only be added in this way. The legal extensions in a view are:

  • Defining fields with dimension: and measure:

  • Declaring joins with join_one:, join_many:, and join_cross:

Reusing, Nesting, and Refining Views

When views are defined in a source, they can be reused in a number of ways.

Reusing Views

They can serve as the starting point for multiple different queries:

run: flights -> by_carrier -> { select: nickname; limit: 1 }
run: flights -> by_carrier -> { index: * }

Nesting Views

Views can also be nested in other views:

document
run: flights -> {
  group_by: origin.state
  nest: by_carrier
}
QUERY RESULTS
stateby_​carrier
AK
nicknameflight_​countdestination_​count
Alaska5025
Northwest1643
Delta1405
United554
American11
AL
nicknameflight_​countdestination_​count
Atlantic Southeast9101
Southwest83612
Delta5874
Continental Express2323
Comair1035
American451
USAir281
Continental151
AR
nicknameflight_​countdestination_​count
American Eagle8533
Southwest3647
Continental Express1363
Delta662
American652
Atlantic Southeast441
Comair402
Northwest131
AZ
nicknameflight_​countdestination_​count
Southwest6,91242
America West3,81941
United7037
American6616
Northwest5435
Alaska3813
USAir3163
Delta905
ATA422
Continental373
Continental Express271
Jetblue71
CA
nicknameflight_​countdestination_​count
Southwest19,75532
United6,90137
American4,68731
Alaska2,6189
Northwest2,20910
America West2,0474
Jetblue9109
Delta74913
USAir5424
Continental1052
ATA784
American Eagle422
Continental Express262
Comair11
[
  {
    "state": "AK",
    "by_carrier": [
      {
        "nickname": "Alaska",
        "flight_count": 502,
        "destination_count": 5
      },
      {
        "nickname": "Northwest",
        "flight_count": 164,
        "destination_count": 3
      },
      {
        "nickname": "Delta",
        "flight_count": 140,
        "destination_count": 5
      },
      {
        "nickname": "United",
        "flight_count": 55,
        "destination_count": 4
      },
      {
        "nickname": "American",
        "flight_count": 1,
        "destination_count": 1
      }
    ]
  },
  {
    "state": "AL",
    "by_carrier": [
      {
        "nickname": "Atlantic Southeast",
        "flight_count": 910,
        "destination_count": 1
      },
      {
        "nickname": "Southwest",
        "flight_count": 836,
        "destination_count": 12
      },
      {
        "nickname": "Delta",
        "flight_count": 587,
        "destination_count": 4
      },
      {
        "nickname": "Continental Express",
        "flight_count": 232,
        "destination_count": 3
      },
      {
        "nickname": "Comair",
        "flight_count": 103,
        "destination_count": 5
      },
      {
        "nickname": "American",
        "flight_count": 45,
        "destination_count": 1
      },
      {
        "nickname": "USAir",
        "flight_count": 28,
        "destination_count": 1
      },
      {
        "nickname": "Continental",
        "flight_count": 15,
        "destination_count": 1
      }
    ]
  },
  {
    "state": "AR",
    "by_carrier": [
      {
        "nickname": "American Eagle",
        "flight_count": 853,
        "destination_count": 3
      },
      {
        "nickname": "Southwest",
        "flight_count": 364,
        "destination_count": 7
      },
      {
        "nickname": "Continental Express",
        "flight_count": 136,
        "destination_count": 3
      },
      {
        "nickname": "Delta",
        "flight_count": 66,
        "destination_count": 2
      },
      {
        "nickname": "American",
        "flight_count": 65,
        "destination_count": 2
      },
      {
        "nickname": "Atlantic Southeast",
        "flight_count": 44,
        "destination_count": 1
      },
      {
        "nickname": "Comair",
        "flight_count": 40,
        "destination_count": 2
      },
      {
        "nickname": "Northwest",
        "flight_count": 13,
        "destination_count": 1
      }
    ]
  },
  {
    "state": "AZ",
    "by_carrier": [
      {
        "nickname": "Southwest",
        "flight_count": 6912,
        "destination_count": 42
      },
      {
        "nickname": "America West",
        "flight_count": 3819,
        "destination_count": 41
      },
      {
        "nickname": "United",
        "flight_count": 703,
        "destination_count": 7
      },
      {
        "nickname": "American",
        "flight_count": 661,
        "destination_count": 6
      },
      {
        "nickname": "Northwest",
        "flight_count": 543,
        "destination_count": 5
      },
      {
        "nickname": "Alaska",
        "flight_count": 381,
        "destination_count": 3
      },
      {
        "nickname": "USAir",
        "flight_count": 316,
        "destination_count": 3
      },
      {
        "nickname": "Delta",
        "flight_count": 90,
        "destination_count": 5
      },
      {
        "nickname": "ATA",
        "flight_count": 42,
        "destination_count": 2
      },
      {
        "nickname": "Continental",
        "flight_count": 37,
        "destination_count": 3
      },
      {
        "nickname": "Continental Express",
        "flight_count": 27,
        "destination_count": 1
      },
      {
        "nickname": "Jetblue",
        "flight_count": 7,
        "destination_count": 1
      }
    ]
  },
  {
    "state": "CA",
    "by_carrier": [
      {
        "nickname": "Southwest",
        "flight_count": 19755,
        "destination_count": 32
      },
      {
        "nickname": "United",
        "flight_count": 6901,
        "destination_count": 37
      },
      {
        "nickname": "American",
        "flight_count": 4687,
        "destination_count": 31
      },
      {
        "nickname": "Alaska",
        "flight_count": 2618,
        "destination_count": 9
      },
      {
        "nickname": "Northwest",
        "flight_count": 2209,
        "destination_count": 10
      },
      {
        "nickname": "America West",
        "flight_count": 2047,
        "destination_count": 4
      },
      {
        "nickname": "Jetblue",
        "flight_count": 910,
        "destination_count": 9
      },
      {
        "nickname": "Delta",
        "flight_count": 749,
        "destination_count": 13
      },
      {
        "nickname": "USAir",
        "flight_count": 542,
        "destination_count": 4
      },
      {
        "nickname": "Continental",
        "flight_count": 105,
        "destination_count": 2
      },
      {
        "nickname": "ATA",
        "flight_count": 78,
        "destination_count": 4
      },
      {
        "nickname": "American Eagle",
        "flight_count": 42,
        "destination_count": 2
      },
      {
        "nickname": "Continental Express",
        "flight_count": 26,
        "destination_count": 2
      },
      {
        "nickname": "Comair",
        "flight_count": 1,
        "destination_count": 1
      }
    ]
  }
]
WITH __stage0 AS (
  SELECT
    group_set,
    origin_0."state" as "state__0",
    CASE WHEN group_set=1 THEN
      carriers_0."nickname"
      END as "nickname__1",
    CASE WHEN group_set=1 THEN
      COUNT( 1)
      END as "flight_count__1",
    CASE WHEN group_set=1 THEN
      COUNT(DISTINCT destination_0."code")
      END as "destination_count__1"
  FROM '../data/flights.parquet' as flights
   LEFT JOIN '../data/airports.parquet' AS destination_0
    ON destination_0."code"=flights."destination"
   LEFT JOIN '../data/airports.parquet' AS origin_0
    ON origin_0."code"=flights."origin"
   LEFT JOIN '../data/carriers.parquet' AS carriers_0
    ON carriers_0."code"=flights."carrier"
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  GROUP BY 1,2,3
)
SELECT
  "state__0" as "state",
  COALESCE(LIST({
    "nickname": "nickname__1", 
    "flight_count": "flight_count__1", 
    "destination_count": "destination_count__1"}  ORDER BY  "flight_count__1" desc NULLS LAST) FILTER (WHERE group_set=1),[]) as "by_carrier"
FROM __stage0
GROUP BY 1
ORDER BY 1 asc NULLS LAST

Refining Views

A view can be "refined," which means adding clauses such as select:, group_by:, aggregate:, or where:. For example, let's define a source with a view and use that to create a query

document
source: flights4 is duckdb.table('../data/flights.parquet') extend {
  view: top_destinations is {
    group_by: destination
    aggregate: flight_count is count()
  }
}

Running the query gives us flight count by destination:

document
run: flights4 -> top_destinations
QUERY RESULTS
destinationflight_​count
ATL17,832
DFW17,776
ORD14,213
PHX12,477
LAS11,092
[
  {
    "destination": "ATL",
    "flight_count": 17832
  },
  {
    "destination": "DFW",
    "flight_count": 17776
  },
  {
    "destination": "ORD",
    "flight_count": 14213
  },
  {
    "destination": "PHX",
    "flight_count": 12477
  },
  {
    "destination": "LAS",
    "flight_count": 11092
  }
]
SELECT 
   flights4."destination" as "destination",
   COUNT( 1) as "flight_count"
FROM '../data/flights.parquet' as flights4
GROUP BY 1
ORDER BY 2 desc NULLS LAST

Now let's refine it by adding + { group_by: origin }. This adds a group_by clause to the original query

document
run: flights4 -> top_destinations + {
  group_by: origin
}
QUERY RESULTS
destinationflight_​countorigin
LGA2,143DCA
DCA2,123LGA
BOS1,214LGA
LGA1,201BOS
LAS1,073LAX
[
  {
    "destination": "LGA",
    "flight_count": 2143,
    "origin": "DCA"
  },
  {
    "destination": "DCA",
    "flight_count": 2123,
    "origin": "LGA"
  },
  {
    "destination": "BOS",
    "flight_count": 1214,
    "origin": "LGA"
  },
  {
    "destination": "LGA",
    "flight_count": 1201,
    "origin": "BOS"
  },
  {
    "destination": "LAS",
    "flight_count": 1073,
    "origin": "LAX"
  }
]
SELECT 
   flights4."destination" as "destination",
   COUNT( 1) as "flight_count",
   flights4."origin" as "origin"
FROM '../data/flights.parquet' as flights4
GROUP BY 1,3
ORDER BY 2 desc NULLS LAST

The query is now calculating flight_count grouped by both destination and origin.

Refinement can be thought of as similar to extending a class in object-oriented programming. The new view inherits the properties of the original, and adds new properties to it. (That said, it should not be confused with the similar concept of source extension.) This makes query logic much more reusable, since views can be easily saved and modified.

Multi-Stage Views

This example shows a view with 3 stages separated by ->. Each stage generates a CTE in the SQL.

document
run: duckdb.table('../data/flights.parquet') -> {
  select: *
  where: dep_time > @2003
} -> {    
  -- extend: allows you to define fields for use within the view
  extend: { measure: flight_count is count() }   
  aggregate: flight_count
  nest: main_view is {
    group_by: carrier
    aggregate: flight_count
  }
} -> {
  select:
    main_view.carrier
    main_view.flight_count
    # percent
    flight_count_as_a_percent_of_total is main_view.flight_count / flight_count
}
QUERY RESULTS
carrierflight_​countflight_​count_​as_​a_​percent_​of_​total
CO2,2281.59%
TZ2,4611.76%
AS2,7071.93%
HP3,1682.26%
B63,3122.36%
[
  {
    "carrier": "CO",
    "flight_count": 2228,
    "flight_count_as_a_percent_of_total": 0.01589317050204728
  },
  {
    "carrier": "TZ",
    "flight_count": 2461,
    "flight_count_as_a_percent_of_total": 0.017555248027620447
  },
  {
    "carrier": "AS",
    "flight_count": 2707,
    "flight_count_as_a_percent_of_total": 0.019310059492388683
  },
  {
    "carrier": "HP",
    "flight_count": 3168,
    "flight_count_as_a_percent_of_total": 0.022598547643844606
  },
  {
    "carrier": "B6",
    "flight_count": 3312,
    "flight_count_as_a_percent_of_total": 0.023625754354928453
  }
]
WITH __stage0 AS (
  SELECT 
     base."arr_delay" as "arr_delay",
     base."arr_time" as "arr_time",
     base."cancelled" as "cancelled",
     base."carrier" as "carrier",
     base."dep_delay" as "dep_delay",
     base."dep_time" as "dep_time",
     base."destination" as "destination",
     base."distance" as "distance",
     base."diverted" as "diverted",
     base."flight_num" as "flight_num",
     base."flight_time" as "flight_time",
     base."id2" as "id2",
     base."origin" as "origin",
     base."tail_num" as "tail_num",
     base."taxi_in" as "taxi_in",
     base."taxi_out" as "taxi_out"
  FROM '../data/flights.parquet' as base
  WHERE base."dep_time">=TIMESTAMP '2004-01-01 00:00:00'
)
, __stage1 AS (
  SELECT
    group_set,
    CASE WHEN group_set=0 THEN
      COUNT( 1)
      END as "flight_count__0",
    CASE WHEN group_set=1 THEN
      base."carrier"
      END as "carrier__1",
    CASE WHEN group_set=1 THEN
      COUNT( 1)
      END as "flight_count__1"
  FROM __stage0 as base
  CROSS JOIN (SELECT UNNEST(GENERATE_SERIES(0,1,1)) as group_set  ) as group_set
  GROUP BY 1,3
)
, __stage2 AS (
  SELECT
    MAX(CASE WHEN group_set=0 THEN "flight_count__0" END) as "flight_count",
    COALESCE(LIST({
      "carrier": "carrier__1", 
      "flight_count": "flight_count__1"}  ORDER BY  "flight_count__1" desc NULLS LAST) FILTER (WHERE group_set=1),[]) as "main_view"
  FROM __stage1
)
SELECT 
   main_view_0."carrier" as "carrier",
   main_view_0."flight_count" as "flight_count",
   main_view_0."flight_count"*1.0/base."flight_count" as "flight_count_as_a_percent_of_total"
FROM __stage2 as base
LEFT JOIN LATERAL (SELECT UNNEST(base."main_view"), 1 as ignoreme) as main_view_0_outer(main_view_0,ignoreme) ON main_view_0_outer.ignoreme=1