Milvus allows you to store and index structured data within a single field using the
JSON
data type. This enables flexible schemas with nested attributes while still allowing efficient filtering via JSON path indexing.
A JSON field is a schema-defined field in Milvus that stores structured key-value data. The values can include strings, numbers, booleans, arrays, or deeply nested objects.
Here’s an example of what a JSON field might look like in a document:
"metadata"
:
{
"category"
:
"electronics"
,
"brand"
:
"BrandA"
,
"in_stock"
:
true
,
"price"
:
99.99
,
"string_price"
:
"99.99"
,
"tags"
:
[
"clearance"
,
"summer_sale"
]
,
"supplier"
:
{
"name"
:
"SupplierX"
,
"country"
:
"USA"
,
"contact"
:
{
"email"
:
"[email protected]"
,
"phone"
:
"+1-800-555-0199"
In this example:
metadata
is the JSON field defined in the schema.
You can store flat values (e.g.
category
,
in_stock
), arrays (
tags
), and nested objects (
supplier
).
To use a JSON field, explicitly define it in the collection schema by specifying the
DataType
as
JSON
.
The example below creates a collection with its schema containing these fields:
The primary key (
product_id
)
A
vector
field (mandatory for each collection)
A
metadata
field of type
JSON
, which can store structured data like flat values, arrays, or nested objects
Python
NodeJS
from pymilvus import MilvusClient, DataType
client = MilvusClient(uri="http://localhost:19530")
schema = client.create_schema(auto_id=False, enable_dynamic_field=True)
schema.add_field(field_name="product_id", datatype=DataType.INT64, is_primary=True)
schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=5)
schema.add_field(field_name="metadata", datatype=DataType.JSON, nullable=True)
client.create_collection(
collection_name="product_catalog",
schema=schema
import io.milvus.v2.client.*;
import io.milvus.v2.service.collection.request.CreateCollectionReq;
import io.milvus.v2.service.collection.request.AddFieldReq;
ConnectConfig config = ConnectConfig.builder()
.uri("http://localhost:19530")
.build();
MilvusClientV2 client = new MilvusClientV2(config);
CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder()
.enableDynamicField(true)
.build();
schema.addField(AddFieldReq.builder()
.fieldName("product_id")
.dataType(DataType.Int64)
.isPrimaryKey(Boolean.TRUE)
.build());
schema.addField(AddFieldReq.builder()
.fieldName("vector")
.dataType(DataType.FloatVector)
.dimension(5)
.build());
schema.addField(AddFieldReq.builder()
.fieldName("metadata")
.dataType(DataType.JSON)
.isNullable(true)
.build());
CreateCollectionReq requestCreate = CreateCollectionReq.builder()
.collectionName("product_catalog")
.collectionSchema(schema)
.build();
client.createCollection(requestCreate);
import { MilvusClient, DataType } from '@zilliz/milvus2-sdk-node';
const client = new MilvusClient({
address: 'localhost:19530'
await client.createCollection({
collection_name: "product_catalog",
fields: [
name: "product_id",
data_type: DataType.Int64,
is_primary_key: true,
autoID: false
name: "vector",
data_type: DataType.FloatVector,
dim: 5
name: "metadata",
data_type: DataType.JSON,
nullable: true
enable_dynamic_field: true
import (
"context"
"github.com/milvus-io/milvus/client/v2/entity"
"github.com/milvus-io/milvus/client/v2/milvusclient"
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
client, err := milvusclient.New(ctx, &milvusclient.ClientConfig{
Address: "localhost:19530",
if err != nil {
return err
schema := entity.NewSchema().WithDynamicFieldEnabled(true)
schema.WithField(entity.NewField().
WithName("product_id").pk
WithDataType(entity.FieldTypeInt64).
WithIsPrimaryKey(true),
).WithField(entity.NewField().
WithName("vector").
WithDataType(entity.FieldTypeFloatVector).
WithDim(5),
).WithField(entity.NewField().
WithName("metadata").
WithDataType(entity.FieldTypeJSON).
WithNullable(true),
err = client.CreateCollection(ctx, milvusclient.NewCreateCollectionOption("product_catalog", schema))
if err != nil {
return err
export TOKEN="root:Milvus"
export CLUSTER_ENDPOINT="http://localhost:19530"
export productIdField='{
"fieldName": "product_id",
"dataType": "Int64",
"isPrimary": true,
"autoID": false
export vectorField='{
"fieldName": "vector",
"dataType": "FloatVector",
"typeParams": {
"dim": 5
export metadataField='{
"fieldName": "metadata",
"dataType": "JSON",
"isNullable": true
export schema="{
\"autoID\": false,
\"enableDynamicField\": true,
\"fields\": [
$productIdField,
$vectorField,
$metadataField
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Content-Type: application/json" \
--data "{
\"collectionName\": \"product_catalog\",
\"schema\": $schema
You can also enable the dynamic field feature to store undeclared fields flexibly, but it’s not required for JSON fields to function. For more information, refer to Dynamic Field.
Once the collection is created, insert entities that contain structured JSON objects in the metadata
JSON field.
Python
NodeJS
entities = [
"product_id": 1,
"vector": [0.1, 0.2, 0.3, 0.4, 0.5],
"metadata": {
"category": "electronics",
"brand": "BrandA",
"in_stock": True,
"price": 99.99,
"string_price": "99.99",
"tags": ["clearance", "summer_sale"],
"supplier": {
"name": "SupplierX",
"country": "USA",
"contact": {
"email": "[email protected]",
"phone": "+1-800-555-0199"
client.insert(collection_name="product_catalog", data=entities)
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import io.milvus.v2.service.vector.request.InsertReq;
Gson gson = new Gson();
JsonObject row = new JsonObject();
row.addProperty("product_id", 1);
row.add("vector", gson.toJsonTree(Arrays.asList(0.1, 0.2, 0.3, 0.4, 0.5)));
JsonObject metadata = new JsonObject();
metadata.addProperty("category", "electronics");
metadata.addProperty("brand", "BrandA");
metadata.addProperty("in_stock", true);
metadata.addProperty("price", 99.99);
metadata.addProperty("string_price", "99.99");
metadata.add("tags", gson.toJsonTree(Arrays.asList("clearance", "summer_sale")));
JsonObject supplier = new JsonObject();
supplier.addProperty("name", "SupplierX");
supplier.addProperty("country", "USA");
JsonObject contact = new JsonObject();
contact.addProperty("email", "[email protected]");
contact.addProperty("phone", "+1-800-555-0199");
supplier.add("contact", contact);
metadata.add("supplier", supplier);
row.add("metadata"
, metadata);
client.insert(InsertReq.builder()
.collectionName("product_catalog")
.data(Collections.singletonList(row))
.build());
const entities = [
"product_id": 1,
"vector": [0.1, 0.2, 0.3, 0.4, 0.5],
"metadata": {
"category": "electronics",
"brand": "BrandA",
"in_stock": True,
"price": 99.99,
"string_price": "99.99",
"tags": ["clearance", "summer_sale"],
"supplier": {
"name": "SupplierX",
"country": "USA",
"contact": {
"email": "[email protected]",
"phone": "+1-800-555-0199"
await client.insert({
collection_name: "product_catalog",
data: entities
_, err = client.Insert(ctx, milvusclient.NewColumnBasedInsertOption("product_catalog").
WithInt64Column("product_id", []int64{1}).
WithFloatVectorColumn("vector", 5, [][]float32{
{0.1, 0.2, 0.3, 0.4, 0.5},
}).WithColumns(
column.NewColumnJSONBytes("metadata", [][]byte{
[]byte(`{
"category": "electronics",
"brand": "BrandA",
"in_stock": True,
"price": 99.99,
"string_price": "99.99",
"tags": ["clearance", "summer_sale"],
"supplier": {
"name": "SupplierX",
"country": "USA",
"contact": {
"email": "[email protected]",
"phone": "+1-800-555-0199"
}`),
if err != nil {
return err
export TOKEN="root:Milvus"
export CLUSTER_ENDPOINT="http://localhost:19530"
export entities='[
"product_id": 1,
"vector": [0.1, 0.2, 0.3, 0.4, 0.5],
"metadata": {
"category": "electronics",
"brand": "BrandA",
"in_stock": true,
"price": 99.99,
"string_price": "99.99",
"tags": ["clearance", "summer_sale"],
"supplier": {
"name": "SupplierX",
"country": "USA",
"contact": {
"email": "[email protected]",
"phone": "+1-800-555-0199"
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/product_catalog/insert" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Content-Type: application/json" \
--data "{
\"data\": $entities
To accelerate scalar filtering on JSON fields, Milvus supports indexing JSON fields using JSON path indexing. This allows you to filter by keys or nested values inside a JSON object without scanning the entire field.
Indexing JSON fields is optional. You can still query or filter by JSON paths without an index, but it may result in slower performance due to brute-force search.
To create a JSON path index, specify:
JSON path (json_path
): The path to the key or nested field within your JSON object that you want to index.
Example: metadata["category"]
This defines where the indexing engine should look inside the JSON structure.
JSON cast type (json_cast_type
): The data type that Milvus should use when interpreting and indexing the value at the specified path.
This type must match the actual data type of the field being indexed. If you want to convert the data type to another during indexing, consider using a cast function.
For a complete list, see below.
Cast types are case-insensitive. The following types are supported:
Cast Type
Description
Example JSON Value
Boolean value
true
, false
double
Numeric value (integer or float)
42
, 99.99
, -15.5
varchar
String value
"electronics"
, "BrandA"
array_bool
Array of booleans
[true, false, true]
array_double
Array of numbers
[1.2, 3.14, 42]
array_varchar
Array of strings
["tag1", "tag2", "tag3"]
Arrays should contain elements of the same type for optimal indexing. For more information, refer to Array Field.
Using the metadata
JSON structure from our introduction, here are examples of how to create indexes on different JSON paths:
Python
NodeJS
index_params = client.prepare_index_params()
index_params.add_index(
field_name="metadata",
index_type="AUTOINDEX",
index_name="category_index",
index_params.add_index(
field_name="metadata",
index_type="AUTOINDEX",
index_name="tags_array_index",
import io.milvus.v2.common.IndexParam;
Map<String,Object> extraParams1 = new HashMap<>();
extraParams1.put("json_path", "metadata[\"category\"]");
extraParams1.put("json_cast_type", "varchar");
indexParams.add(IndexParam.builder()
.fieldName("metadata")
.indexName("category_index")
.indexType(IndexParam.IndexType.AUTOINDEX)
.extraParams(extraParams1)
.build());
Map<String,Object> extraParams2 = new HashMap<>();
extraParams2.put("json_path", "metadata[\"tags\"]");
extraParams2.put("json_cast_type", "array_varchar");
indexParams.add(IndexParam.builder()
.fieldName("metadata")
.indexName("tags_array_index")
.indexType(IndexParam.IndexType.AUTOINDEX)
.extraParams(extraParams2)
.build());
const indexParams = [
collection_name: "product_catalog",
field_name: "metadata",
index_name: "category_index",
index_type: "AUTOINDEX",
extra_params: {
json_path: 'metadata["category"]',
json_cast_type: "varchar",
collection_name: "product_catalog",
field_name: "metadata",
index_name: "tags_array_index",
index_type: "AUTOINDEX",
extra_params: {
json_path: 'metadata["tags"]',
json_cast_type: "array_varchar",
import (
"github.com/milvus-io/milvus/client/v2/index"
jsonIndex1 := index.NewJSONPathIndex(index.AUTOINDEX, "varchar", `metadata["category"]`)
.WithIndexName("category_index")
jsonIndex2 := index.NewJSONPathIndex(index.AUTOINDEX, "array_varchar", `metadata["tags"]`)
.WithIndexName("tags_array_index")
indexOpt1 := milvusclient.NewCreateIndexOption("product_catalog", "metadata", jsonIndex1)
indexOpt2 := milvusclient.NewCreateIndexOption("product_catalog", "metadata", jsonIndex2)
export categoryIndex='{
"fieldName": "metadata",
"indexName": "category_index",
"params": {
"index_type": "AUTOINDEX",
"json_path": "metadata[\\\"category\\\"]",
"json_cast_type": "varchar"
export tagsArrayIndex='{
"fieldName": "metadata",
"indexName": "tags_array_index",
"params": {
"index_type": "AUTOINDEX",
"json_path": "metadata[\\\"tags\\\"]",
"json_cast_type": "array_varchar"
If your JSON field key contains values in an incorrect format (e.g., numbers stored as strings), you can use cast functions to convert values during indexing.
Cast functions are case-insensitive. The following types are supported:
Cast Function
Converts From → To
Use Case
"STRING_TO_DOUBLE"
String → Numeric (double)
Convert "99.99"
to 99.99
Python
NodeJS
index_params.add_index(
field_name="metadata",
index_type="AUTOINDEX",
index_name="string_to_double_index",
params={
"json_path": "metadata[\"string_price\"]",
"json_cast_type": "double",
"json_cast_function": "STRING_TO_DOUBLE"
Map<String,Object> extraParams3 = new HashMap<>();
extraParams3.put("json_path", "metadata[\"string_price\"]");
extraParams3.put("json_cast_type", "double");
extraParams3.put("json_cast_function", "STRING_TO_DOUBLE");
indexParams.add(IndexParam.builder()
.fieldName("metadata")
.indexName("string_to_double_index")
.indexType(IndexParam.IndexType.AUTOINDEX)
.extraParams(extraParams3)
.build());
indexParams.push({
collection_name: "product_catalog",
field_name: "metadata",
index_name: "string_to_double_index",
index_type: "AUTOINDEX",
extra_params: {
json_path: 'metadata["string_price"]',
json_cast_type: "double",
json_cast_function: "STRING_TO_DOUBLE",
jsonIndex3 := index.NewJSONPathIndex(index.AUTOINDEX, "double", `metadata["string_price"]`)
.WithIndexName("string_to_double_index")
indexOpt3 := milvusclient.NewCreateIndexOption("product_catalog", "metadata", jsonIndex3)
export stringToDoubleIndex='{
"fieldName": "metadata",
"indexName": "string_to_double_index",
"params": {
"index_type": "AUTOINDEX",
"json_path": "metadata[\\\"string_price\\\"]",
"json_cast_type": "double",
"json_cast_function": "STRING_TO_DOUBLE"
The json_cast_type
parameter is mandatory and must be the same as the cast function’s output type.
If conversion fails (e.g., non-numeric string), the value is skipped and not indexed.
After defining the index parameters, you can apply them to the collection using create_index()
:
Python
NodeJS
client.create_index(
collection_name="product_catalog",
index_params=index_params
import io.milvus.v2.service.index.request.CreateIndexReq;
client.createIndex(CreateIndexReq.builder()
.collectionName("product_catalog")
.indexParams(indexParams)
.build());
await client.createIndex(indexParams)
indexTask1, err := client.CreateIndex(ctx, indexOpt1)
if err != nil {
return err
indexTask2, err := client.CreateIndex(ctx, indexOpt2)
if err != nil {
return err
indexTask3, err := client.CreateIndex(ctx, indexOpt3)
if err != nil {
return err
export indexParams="[
$categoryIndex,
$tagsArrayIndex,
$stringToDoubleIndex
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/indexes/create" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Content-Type: application/json" \
--data "{
\"collectionName\": \"product_catalog\",
\"indexParams\": $indexParams
After inserting and indexing JSON fields, you can filter on them using standard filter expressions with JSON path syntax.
For example:
Python
NodeJS
filter = 'metadata["category"] == "electronics"'
filter = 'metadata["price"] > 50'
filter = 'json_contains(metadata["tags"], "featured")'
String filter = 'metadata["category"] == "electronics"';
String filter = 'metadata["price"] > 50';
String filter = 'json_contains(metadata["tags"], "featured")';
let filter = 'metadata["category"] == "electronics"'
let filter = 'metadata["price"] > 50'
let filter = 'json_contains(metadata["tags"], "featured")'
filter := 'metadata["category"] == "electronics"'
filter := 'metadata["price"] > 50'
filter := 'json_contains(metadata["tags"], "featured")'
export filterCategory='metadata["category"] == "electronics"'
export filterPrice='metadata["price"] > 50'
export filterTags='json_contains(metadata["tags"], "featured")'
To use these expressions in a search or query, make sure:
You have created an index on each vector field.
The collection is loaded into memory.
For a full list of supported operators and expressions, refer to JSON Operators.
By now, you’ve learned how to define, insert, and optionally index structured values inside a JSON field.
To complete the workflow in a real-world application, you’ll also need to:
Create an index on your vector fields (mandatory for each vector field in a collection)
Refer to Set Index Parameters
Load the collection
Refer to Load & Release
Search or query using JSON path filters
Refer to Filtered Search and JSON Operators
What are the differences between a JSON field and the dynamic field?
JSON field is schema-defined. You must explicitly declare the field in the schema.
Dynamic field is a hidden JSON object ($meta
) that automatically stores any field not defined in the schema.
Both support nested structures and JSON path indexing, but dynamic fields are more suitable for optional or evolving data structures.
Refer to Dynamic Field for details.
Yes. Each JSON field is limited to 65,536 bytes.
No, JSON fields do not support default values. However, you can set nullable=True
when defining the field to allow empty entries.
Refer to Nullable & Default for details.
Yes, to ensure compatibility with queries and indexing:
Use only letters, numbers, and underscores in JSON keys.
Avoid using special characters, spaces, or dots (.
, /
, etc.).
Incompatible keys may cause parsing issues in filter expressions.
How does Milvus handle string values in JSON fields?
Milvus stores string values exactly as they appear in the JSON input—without semantic transformation. Improperly quoted strings may result in errors during parsing.
Examples of valid strings:
"a\"b", "a'b", "a\\b"
Examples of invalid strings:
'a"b', 'a\'b'
Numeric Indexing:
If an index is created with json_cast_type="double"
, only numeric filter conditions (e.g., >
, <
, == 42
) will leverage the index. Non-numeric conditions may fall back to a brute-force scan.
String Indexing:
If an index uses json_cast_type="varchar"
, only string filter conditions will benefit from the index; other types may fall back to a brute-force scan.
Boolean Indexing:
Boolean indexing behaves similarly to string indexing, with index usage only when the condition strictly matches true or false.