// Clickhouse
SELECT array(1, 2) AS x, toTypeName(x)
+--------+--------------+
│ [1,2] │ Array(UInt8) │
+--------+--------------+
// Clickhouse
SELECT [1, 2] AS x, toTypeName(x)
+--------+--------------+
│ [1,2] │ Array(UInt8)│
+--------+--------------+
// Presto
SELECT numbers, animals, n, a
FROM (
VALUES
(ARRAY[2, 5], ARRAY['dog', 'cat', 'bird']),
(ARRAY[7, 8, 9], ARRAY['cow', 'pig'])
) AS x (numbers, animals)
CROSS JOIN UNNEST(numbers, animals) AS t (n, a);
Doris需要支持Array Literal,可以先只支持其中一种即可。支持ARRAY(xx, xxx, xxx)方式创建Array Literal方式创建数组
需要检查元素类型是否一致
SELECT语法
当前调研的数据库在select查询Array时,主要的展现形式有2种:
Clickhouse、Presto,druid:展示Array数据结构,包括数据,select时没有打平数据
// QUERY:
SELECT * FROM arrays_test
// RESULT:
+----------+----------+
│ Hello │ [1,2] │
│ World │ [3,4,5] │
│ Goodbye │ [] │
+----------+----------+
Impala:不能展示Array数据结构,只能打平Array数据。Impala在select时需要指定ITEM或者POS,所以展示出来并没有相应的Array结构,而是将Array的数据打平成多行展示
// DDL
CREATE TABLE complex_array (country STRING, city ARRAY <STRING>) STORED AS PARQUET;
// QUERY
SELECT country, city.item FROM complex_array, complex_array.city;
// RESULT
+---------+-------------+
| country | item |
+---------+-------------+
| Canada | Toronto |
| France | Paris |
| France | Nice |
| France | Marseilles |
| France | Cannes |
| Greece | Athens |
| Greece | Piraeus |
+---------+-------------+
Doris优先支持Clickhouse,Presto类似的方式,直接展示数据结构,打平数据通过函数UNNEST支持
类似Impala的item、pos方式暂时不考虑支持
逻辑运算符(or, and, not)
Clickhouse,Presto不能直接使用array列进行逻辑运算,都是通过使用Array相关的函数做逻辑判断
Druid定义了一系列规则,支持直接在Array列上直接处理逻辑子句
Impala需要在查询时指明是item或pos,也支持用item和pos计算,等同于单值列
相比较与单独定义逻辑子句规则,实现不同的算子扩展性会更好一点,所以这里Doris以第一种方式支持。
Join子句
支持Join的方式:
Clickhouse需要使用Array Join语法打平Array数据,也支持使用ArrayMap函数打平数据
Presto中需要使用UNNEST函数打平Array数据
Impala由于在查询时已经打平数据,支持直接join
Druid没有明确表示支持join array
暂时不支持
Group By
Clickhouse不支持array列Group By
Druid支持array列group by,先将数据打平再做聚合
impala支持,操作等同于单值列
presto未明确是否支持Array列
presto未明确是否支持Array列
Clickhouse支持多种数据导入格式,CSV/TSV的Array导入格式规则:[xxx,xxx,...]
Impala不支持直接导入复杂类型,可以用hive已经生成的ORC/Parquet数据
Druid支持格式较多,可以自己在spec中定义listDelimiter(默认为ctrl+A)
对于CSV,TSV上可以参考Clickhouse的导入格式,支持规则为[xxx${listDelimiter}xxx${listDelimiter}xxx]
, listDelimiter默认为,
对于ORCfile,Parquet已经自带支持嵌套类型
存储上,这里调研的数据库存储方式大同小异,对于Array都是分为两列数据存储:offset列和数据列。相同的处理方式还有对string的存储
Click,Presto-ORCfile:
*offset列:存储ArraySize,读取时需要叠加ArraySize计算offset
数据列:按照具体类型序列化
Doris String:
offset列:存储String的offset,读取时需要读取前一个String的offset,计算size
数据列:string做字典压缩
1: required string column_name
2: required Types.TColumnType column_type
3: optional Types.TAggregationType aggregation_type
4: optional bool is_key
5: optional bool is_allow_null
6: optional string default_value
7: optional bool is_bloom_filter_column
// for nested type
// array name: value
8: optional list<TColumn> children_column //新增字段
enum TPrimitiveType {
INVALID_TYPE,
// ......
OBJECT,
ARRAY// 新增类型
内存结构,由于Column新增了children Column,需要修改序列化的功能
注意考虑元数据的前后兼容性!
public class Column implements Writable {
private static final Logger LOG = LogManager.getLogger(Column.class);
private static final String COLUMN_ARRAY_CHILDREN = "item";
@SerializedName(value = "name")
private String name;
@SerializedName(value = "type")
private Type type;
// 新增字段
@SerializedName(value = "name")
private List<Column> children;
// .....
SELECT
运算符&SELECT子句
JOIN/GROUP BY/ORDER BY/DISTINCT子句 暂时不支持 Array列,需要在相关SQL子句的analyze方法中是否支持array列
暂时不支持 Array列直接使用逻辑运算符 和 算术运算符,算子需要检查Array列
支持展示Array数据结构:
// QUERY:
SELECT * FROM arrays_test
// RESULT:
+----------+----------+
│ Hello │ [1,2] │
│ World │ [3,4,5] │
│ Goodbye │ [] │
+----------+----------+
参考Impala,嵌套类型的数据可以用同一个数据结构表示。
Tuple中数据结构:
Strut collection {
// 数组⻓度
size_t length;
// null bitmap
uint8_t* null_bitmap_data;
// 元素数据AnyVal数据
void* data
参考impala,嵌套类型的数据可以统一为一种数据结构,这里在Tuple转换为Val时不做具体类型的数据转换。在进行具体算子计算时,再根据子类型转换数据类型。为了方便数据处理(可能需要递归),这里增加一个iterator模板,用于方便转化嵌套类型数据。
// BE CollectionVal
struct CollectionVal : AnyVal {
size_t len;
uint8_t* null_bitmap;
void* data;
CollectionValIterator iterater(TypeDescriptor type) {
return CollectionValIterator(type, this);
class CollectionValIterator {
public:
CollectionValIterator(TypeDescriptor type, CollectionVal* data) {
type_size = any_val_size(type);
data = data;
bool seek(int n) {
offset = n;
bool next() {
if (offset < data.len) {
offset ++;
return true;
return false;
// 返回子元素指针
void* data() {
return data + type_size * offset;
// 按照特定类型读取数据
template <typename T>
void read(T* dst);
protected:
size_t offset;
size_t type_size;
CollectionVal data;
Description
Doris用于描述RowBatch的TupleDescription、SlotDescription中包括TypeDescription,TypeDescription已经支持了树形结构的嵌套类型(包括thfit中的TTypeNode),可以直接复用结构。
struct TypeDescriptor {
PrimitiveType type;
// ......
// 深度优先的type list
/// Empty for scalar types
std::vector<TypeDescriptor> children;
/// Only set if type == TYPE_STRUCT. The field name of each child.
std::vector<std::string> field_names;
// ......
// TTypeNode -> TypeDescription转换
TypeDescriptor::TypeDescriptor(const std::vector<TTypeNode>& types, int* idx) :
len(-1), precision(-1), scale(-1) {
DCHECK_GE(*idx, 0);
DCHECK_LT(*idx, types.size());
const TTypeNode& node = types[*idx];
switch (node.type) {
case TTypeNodeType::SCALAR: {
// ......
break;
case TTypeNodeType::ARRAY:
DCHECK(!node.__isset.scalar_type);
DCHECK_LT(*idx, types.size() - 1);
type = TYPE_ARRAY;
++(*idx);
children.push_back(TypeDescriptor(types, idx));
break;
// ......
Array Literal
支持语法:
SELECT array(1, 2) AS x;
SELECT array("1", "2") AS x;
FE上需要支持ArrayLiteral
public class ArrayLiteral extends LiteralExpr {
private List<LiteralExpr> value;
public ArrayLiteral(List<LiteralExpr> data) {
value.addAll(data);
E cup/jflex做语法解析时传递字符串创建比较复杂,这里通过在FE上内置一个array()的算子实现。
需要注意:由于Array的传参个数不定,当前FE查找内置function的方式(定长的参数列表、固定的参数类型)是无法支持Array()算子的,需要在FunctionCallExpr的analyzeImpl方法中,针对嵌套类型的算子单独处理
public class FEFunctions {
// ....
@FEFunction(name = "array", argTypes = {"LIST"}, returnType = "ARRAY")
public static LiteralExpr array(List<LiteralExpr> list) {
// 检查元素类型
checkType(list);
return new ArrayLiteral(list);
FE传递ArrayLiteral到BE时,thift上没有直接的ArrayLiteral结构,可以复用ExprNode的层级关系表示,最终需要在get_collection_val()时,将_collection_value数组转换为collection_val格式
class Literal : public Expr {
public:
// ....
private:
ExprValue _value;
std::vector<Literal> _collection_value; //新增
Literal::Literal(const TExprNode& node) :
Expr(node) {
switch (_type.type) {
case TYPE_BOOLEAN:
// ......
case TYPE_TINYINT:
// ......
case TYPE_ARRAY: {
DCHECK_EQ(node.node_type, TExprNodeType::ARRAY_LITERAL;
DCHECK(node.__isset.array_literal);
for (int i = 0; i < this->children().size(); ++i) {
_collection_value.push_back(Literal(this->children()[i]));
break;
default:
break;
// DCHECK(false) << "Invalid type: " << TypeToString(_type.type);
由于不支持其他子句,主要涉及修改的Expr: SlotRef,Literal,ScalarFnCall(算子),result_writer
SlotRef:直接读取tuple数据转为CollectionVal,不做特殊处理。
// SlotRef
CollectionVal SlotRef::get_collection_val(ExprContext *context, TupleRow * row) {
DCHECK(_type.is_array_type());
Tuple* t = row->get_tuple(_tuple_idx);
if (t == NULL || t->is_null(_null_indicator_offset)) {
return CollectionVal::null();
return CollectionVal(t->get_slot(_slot_offset));
ScalarFnCall:当前算子的框架下参数的Type不是树形结构,需要增加children list,用于支持树形的Type结构。具体算子待实现。
// udf.h
struct TypeDesc {
Type type;
std::vector<TypeDesc> children; // 新增字段
/// Only valid if type == TYPE_DECIMAL
int precision;
int scale;
/// Only valid if type == TYPE_FIXED_BUFFER || type == TYPE_VARCHAR
int len;
FE执行规划时需要检查嵌套数据子类型和返回值类型是否一致。
result_write:result_write需要重构add_one_row方法,将具体类型数据的添加抽象出来,以便于嵌套结构递归调用。
// result_write
Status ResultWriter::add_one_row(TupleRow* row) {
_row_buffer->reset();
int num_columns = _output_expr_ctxs.size();
int buf_ret = 0;
for (int i = 0; 0 == buf_ret && i < num_columns; ++i) {
void* item = _output_expr_ctxs[i]->get_value(row);
if (NULL == item) {
buf_ret = _row_buffer->push_null();
continue;
add_value(_output_expr_ctxs[i]->root()->type().type, item);
if (0 != buf_ret) {
return Status::InternalError("pack mysql buffer failed.");
return Status::OK();
int ResultWriter::add_value(TypeDescriptor type, void* item) {
int buf_ret = 0;
if (NULL == item) {
buf_ret = _row_buffer->push_null();
switch (type) {
case TYPE_BOOLEAN:
case TYPE_TINYINT:
buf_ret = _row_buffer->push_tinyint(*static_cast<int8_t*>(item));
break;
// .......
case TYPE_ARRAY: {
_row_buffer->push_string("[", 1);
CollectionValIter iter = ((CollectionVal) item).iterator();
while (iter.next()) {
add_value(type.children[0], iter.data());
_row_buffer->push_string(",", 1);
_row_buffer->push_string("]", 1);
// .......
对于CSV,TSV,支持规则为[xxx${listDelimiter}xxx${listDelimiter}xxx]
, listDelimiter默认为,
对于ORCfile,Parquet已经自带支持嵌套类型
#2885
Main include:
* FE SQL syntax (Create table) support array type column, like 'name Array<int> not null'
* FE SQL syntax support ArrayLiteral, like 'select array(1, 2, 3, 4)'
* FE update meta: Columns meta support array type, include meta serialized and deserialized, and add flags in FEMetaVersions for compatibility
* FE support ArrayLiteral: Mainly support array related function, and the array constucted function support sub-element type auto-cast.
* BE update result buffer, support dynamic mode for output variable length data type.
* BE support ArrayLiteral and array(*) functions.
Main include:
* FE SQL syntax (Create table) support array type column, like 'name Array<int> not null'
* FE SQL syntax support ArrayLiteral, like 'select array(1, 2, 3, 4)'
* FE update meta: Columns meta support array type, include meta serialized and deserialized, and add flags in FEMetaVersions for compatibility
* FE support ArrayLiteral: Mainly support array related function, and the array constucted function support sub-element type auto-cast.
* BE update result buffer, support dynamic mode for output variable length data type.
* BE support ArrayLiteral and array(*) functions.
And other todo:
* BE meta udpate
* BE transfer data from storage to tuple
* BE support array type related function(hash, equal.....)
* BE support array type data import
* other....(many related code, such as print value, value copy...)
Main include:
* FE SQL syntax (Create table) support array type column, like 'name Array<int> not null'
* FE SQL syntax support ArrayLiteral, like 'select array(1, 2, 3, 4)'
* FE update meta: Columns meta support array type, include meta serialized and deserialized, and add flags in FEMetaVersions for compatibility
* FE support ArrayLiteral: Mainly support array related function, and the array constucted function support sub-element type auto-cast.
* BE update result buffer, support dynamic mode for output variable length data type.
* BE support ArrayLiteral and array(*) functions.
And other todo:
* BE meta udpate
* BE transfer data from storage to tuple
* BE support array type related function(hash, equal.....)
* BE support array type data import
* other....(many related code, such as print value, value copy...)
Main include:
* FE SQL syntax (Create table) support array type column, like 'name Array<int> not null'
* FE SQL syntax support ArrayLiteral, like 'select array(1, 2, 3, 4)'
* FE update meta: Columns meta support array type, include meta serialized and deserialized, and add flags in FEMetaVersions for compatibility
* FE support ArrayLiteral: Mainly support array related function, and the array constucted function support sub-element type auto-cast.
* BE update result buffer, support dynamic mode for output variable length data type.
* BE support ArrayLiteral and array(*) functions.
And other todo:
* BE meta udpate
* BE transfer data from storage to tuple
* BE support array type related function(hash, equal.....)
* BE support array type data import
* other....(many related code, such as print value, value copy...)
ArrayType will cause some sub-query to failed. The reason is that
the ArrayType will used in subquery, and the ArrayType mean mutli-rows
but not a scalar-value in subquery's context.
[array][be] Support array type 1/4. add array to tablet_meta, add array expr/function, mysql buffer
#4643
[array][be] Support array type 3/4. Be exec scan array, insert array and sink array type.
#4650