Problem with knn query in joins

I’m using the siren-federate plugin and I’m running into an issue when executing a KNN query inside a join.

Query

POST siren/main_index/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "join": {
            "indices": ["refIndex"],
            "on": ["id", "refId"],
            "request": {
              "query": {
                "bool": {
                  "must": [
                    {
                      "knn": {
                        "field": "embedding",
                        "query_vector": [ -0.0457, 0.0562, ... ],
                        "k": 5,
                        "filter": []
                      }
                    }
                  ],
                  "must_not": [
                    { "ids": { "values": ["123"] } }
                  ]
                }
              },
              "project": [
                { "field": { "name": "_score", "alias": "knn_score" } }
              ]
            }
          }
        },
        {
          "function_score": {
            "functions": [
              {
                "field_value_factor": {
                  "field": "knn_score",
                  "factor": 1,
                  "missing": 1
                }
              }
            ]
          }
        }
      ]
    }
  },
  "_source": ["*"],
  "min_score": 0.5,
  "size": 10,
  "track_total_hits": true,
  "sort": [
    { "_score": { "order": "desc" } }
  ]
}

Error

The request returns a 500 error with the following warning in the Elasticsearch logs:

{
  "@timestamp": "2025-12-11T15:27:41.125Z",
  "log.level": "WARN",
  "message": "path: /siren/main_index/_search, params: {index=main_index}, status: 500",
  "ecs.version": "1.2.0",
  "service.name": "ES_ECS",
  "event.dataset": "elasticsearch.server",
  "process.thread.name": "elasticsearch[elasticsearch01][federate.planner][T#54]",
  "log.logger": "rest.suppressed",
  "elasticsearch.cluster.uuid": "leQFsbc4StiY-TDgdKCgGA",
  "elasticsearch.node.id": "3AQi29SUSGy97ZX5fatcWA",
  "elasticsearch.node.name": "elasticsearch01",
  "elasticsearch.cluster.name": "es-docker-cluster",
  "error.type": "java.lang.NullPointerException",
  "error.message": "Cannot invoke \"java.lang.Integer.intValue()\" because \"this.e\" is null",
  "error.stack_trace": "java.lang.NullPointerException: Cannot invoke \"java.lang.Integer.intValue()\" because \"this.e\" is null
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.expr.d$h.hash(CompoundOperatorExpressionTreeNode.java:779)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.b.b.b.a(Hasher.java:205)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.b.b.b.a(Hasher.java:190)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.expr.d.a(CompoundOperatorExpressionTreeNode.java:81)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.expr.d$h.a(CompoundOperatorExpressionTreeNode.java:720)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.expr.A.a(UnaryOperatorExpressionTreeNode.java:127)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.expr.A.a(UnaryOperatorExpressionTreeNode.java:24)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.expr.d.a(CompoundOperatorExpressionTreeNode.java:83)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.expr.d$a.a(CompoundOperatorExpressionTreeNode.java:146)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.expr.A.a(UnaryOperatorExpressionTreeNode.java:127)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.expr.A.a(UnaryOperatorExpressionTreeNode.java:24)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.b.b.b.a(Hasher.java:109)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.d.n$a.hash(LogicalTableFunctionScan.java:250)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.b.b.b.a(Hasher.java:87)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.d.n$b.hash(LogicalTableFunctionScan.java:301)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.b.b.b.a(Hasher.java:87)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.d.h$a.hash(LogicalJoin.java:294)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.b.b.b.a(Hasher.java:87)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.b.b.b.a(Hasher.java:58)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.d.g$1.a(LogicalHashMapper.java:59)
	at java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1724)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.d.g$1.a(LogicalHashMapper.java:59)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.d.h.a(LogicalJoin.java:200)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.d.f.a(LogicalExchange.java:52)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.d.n.a(LogicalTableFunctionScan.java:200)
	at com.google.common@32.1.3-jre/com.google.common.collect.ImmutableList.forEach(ImmutableList.java:422)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.d.n.a(LogicalTableFunctionScan.java:200)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.d.f.a(LogicalExchange.java:52)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.d.g.a(LogicalHashMapper.java:45)
	at io.siren.federate.core@8.19.8-38.5/io.siren.federate.core.planner.aqp.d.k.a(LogicalQueryTreeService.java:133)
	at java.base/java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:1171)
	at java.base/java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:503)
	at org.elasticsearch.server@8.19.8/org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:977)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1090)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:614)
	at java.base/java.lang.Thread.run(Thread.java:1474)
"
}

Question

Has anyone seen this issue when running a knn query inside a join?
Is this a known limitation or a bug in this version of siren-federate?

Hello,

Try this knn query with num_candidates, we will get that fixed in the next release.

"knn": {
  "field": "embedding",
  "query_vector": [ -0.0457, 0.0562, ... ],
  "num_candidates": 10,
  "k": 5,
  "filter": []
}

Thanks

Thanks, this worked
However, there’s one issue: even though I removed the 10k limit and can set k and num_candidates to 50k outside of the join, it still doesn’t work inside the join and the maximum remains 10k.

I have to return 50k, I know it might not be standard, but it’s necessary for my current needs

can you share the query you have tried, to illustrate that issue you have described ?

I’m able to run this kNN query without any problem when it’s executed outside the join:

POST siren/vectors/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "knn": {
            "field": "vector",
            "query_vector": [ -0.01085220742970705, 0.010437008924782276, ... ],
            "k": 50000,
            "num_candidates": 50000,
            "filter": [
              {
                "bool": {
                  "must_not": [
                    { "ids": { "values": ["123"] } }
                  ]
                }
              }
            ]
          }
        }
      ]
    }
  },
  "min_score": 0.7,
  "size": 25,
  "sort": [
    { "_score": { "order": "desc" } }
  ],
  "_source": { "includes": ["id"] },
  "track_total_hits": true
}

But when I run the same query inside a join, it doesn’t work.
So the problem isn’t the query itself — the issue is that the join context doesn’t seem to support the the 50K filter inside it.

this works with 10k not 50k

POST siren/user/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "join": {
            "indices": ["vectors"],
            "on": ["id", "id"],
            "request": {
              "query": {
                "bool": {
                  "must": [
                    {
                      "knn": {
                        "field": "embedding",
                        "query_vector": [ -0.045755304396152496, 0.05626086145639419, ... ],
                        "k": 10000,
                        "num_candidates": 10000,
                        "filter": []
                      }
                    }
                  ],
                  "must_not": [
                    {
                      "ids": { "values": ["123"] }
                    }
                  ]
                }
              },
              "project": [
                {
                  "field": {
                    "name": "_score",
                    "alias": "knn_score"
                  }
                }
              ]
            }
          }
        }
      ]
    }
  },
  "from": 0,
  "min_score": 0.7,
  "size": 2,
  "sort": [
    { "_score": { "order": "desc" } }
  ],
  "_source": { "includes": ["id"] },
  "track_total_hits": true
}

It’s a bit strange because num_candidates has a hard limit of 10k. You should get an exception with both queries.

You’re right, my mistake.
I didn’t use num_candidates in the version without the join (Optional).

The real issue is that inside a join I can’t run something like:

"knn": {
  "field": "embedding",
  "query_vector": [ -0.045755304396152496, 0.05626086145639419, ... ],
  "k": 50000,
  "num_candidates": 10000,  <------ here i can not pass smaller value
  "filter": []
}

In the join query, num_candidates isn’t allowed to be smaller than k, and the join layer forces me to provide num_candidates.
Outside the join it’s optional, but if I don’t include it inside the join, I get the exception I mentioned earlier.

Got it thanks. Unfortunately this use case requires the fix that will ship in the next release, since the quick fix available to you now is to provide num_candidates.

Thanks a lot! Looking forward to the next release