diff --git a/src/core/dash.h b/src/core/dash.h
index 0c67fd6b2..05a99560f 100644
--- a/src/core/dash.h
+++ b/src/core/dash.h
@@ -149,6 +149,9 @@ class DashTable : public detail::DashTableBase {
   template <typename U> const_iterator Find(U&& key) const;
   template <typename U> iterator Find(U&& key);
 
+  // Prefetches the memory where the key would resize into the cache.
+  template <typename U> void Prefetch(U&& key) const;
+
   // Find first entry with given key hash that evaulates to true on pred.
   // Pred accepts either (const key&) or (const key&, const value&)
   template <typename Pred> iterator FindFirst(uint64_t key_hash, Pred&& pred);
@@ -699,6 +702,14 @@ auto DashTable<_Key, _Value, Policy>::Find(U&& key) -> iterator {
   return FindFirst(DoHash(key), EqPred(key));
 }
 
+template <typename _Key, typename _Value, typename Policy>
+template <typename U>
+void DashTable<_Key, _Value, Policy>::Prefetch(U&& key) const {
+  uint64_t key_hash = DoHash(key);
+  uint32_t seg_id = SegmentId(key_hash);
+  segment_[seg_id]->Prefetch(key_hash);
+}
+
 template <typename _Key, typename _Value, typename Policy>
 template <typename Pred>
 auto DashTable<_Key, _Value, Policy>::FindFirst(uint64_t key_hash, Pred&& pred) -> iterator {
diff --git a/src/core/dash_internal.h b/src/core/dash_internal.h
index 25981140d..0293d755d 100644
--- a/src/core/dash_internal.h
+++ b/src/core/dash_internal.h
@@ -502,6 +502,7 @@ template <typename _Key, typename _Value, typename Policy = DefaultSegmentPolicy
 
   // Find item with given key hash and truthy predicate
   template <typename Pred> Iterator FindIt(Hash_t key_hash, Pred&& pred) const;
+  void Prefetch(Hash_t key_hash) const;
 
   // Returns valid iterator if succeeded or invalid if not (it's full).
   // Requires: key should be not present in the segment.
@@ -1188,6 +1189,18 @@ auto Segment<Key, Value, Policy>::FindIt(Hash_t key_hash, Pred&& pred) const ->
   return Iterator{};
 }
 
+template <typename Key, typename Value, typename Policy>
+void Segment<Key, Value, Policy>::Prefetch(Hash_t key_hash) const {
+  uint8_t bidx = BucketIndex(key_hash);
+  const Bucket& target = bucket_[bidx];
+  uint8_t nid = NextBid(bidx);
+  const Bucket& probe = bucket_[nid];
+
+  // Prefetch buckets that might the key with high probability.
+  __builtin_prefetch(&target, 0, 1);
+  __builtin_prefetch(&probe, 0, 1);
+}
+
 template <typename Key, typename Value, typename Policy>
 template <typename Cb>
 void Segment<Key, Value, Policy>::TraverseAll(Cb&& cb) const {
diff --git a/src/server/transaction.cc b/src/server/transaction.cc
index 67f910438..813aefd6d 100644
--- a/src/server/transaction.cc
+++ b/src/server/transaction.cc
@@ -1171,6 +1171,8 @@ void Transaction::ScheduleBatchInShard() {
   ShardId sid = shard->shard_id();
   auto& sq = schedule_queues[sid];
 
+  array<ScheduleContext*, 32> batch;
+
   for (unsigned j = 0;; ++j) {
     // We pull the items from the queue in a loop until we reach the stop condition.
     // TODO: we may have fairness problem here, where transactions being added up all the time
@@ -1178,16 +1180,39 @@ void Transaction::ScheduleBatchInShard() {
     // because we must ensure that there is another ScheduleBatchInShard callback in the queue.
     // Can be checked with testing sq.armed is true when j == 1.
     while (true) {
-      ScheduleContext* item = sq.queue.Pop();
-      if (!item)
+      unsigned len = 0;
+      for (; len < batch.size(); ++len) {
+        ScheduleContext* item = sq.queue.Pop();
+        if (!item)
+          break;
+        batch[len] = item;
+        if (!item->trans->IsGlobal()) {
+          auto shard_args = item->trans->GetShardArgs(sid);
+          // Can be empty if the transaction is not touching any keys and is
+          // NO_KEY_TRANSACTIONAL.
+          if (!shard_args.Empty()) {
+            auto& db_slice = item->trans->GetDbSlice(shard->shard_id());
+
+            // We could prefetch all the keys but this is enough to test the optimization for
+            // single key operations.
+            db_slice.GetDBTable(item->trans->GetDbIndex())->prime.Prefetch(shard_args.Front());
+          }
+        }
+      }
+
+      if (len == 0)
         break;
 
-      if (!item->trans->ScheduleInShard(shard, item->optimistic_execution)) {
-        item->fail_cnt.fetch_add(1, memory_order_relaxed);
+      stats.tx_batch_scheduled_items_total += len;
+
+      for (unsigned i = 0; i < len; ++i) {
+        ScheduleContext* item = batch[i];
+        if (!item->trans->ScheduleInShard(shard, item->optimistic_execution)) {
+          item->fail_cnt.fetch_add(1, memory_order_relaxed);
+        }
+        item->trans->FinishHop();
       }
-      item->trans->FinishHop();
-      stats.tx_batch_scheduled_items_total++;
-    };
+    }
 
     // j==1 means we already signalled that we're done with the current batch.
     if (j == 1)