Redis源码分析 --hashtable

首先来看一下dict结构体，是redis的哈希表实现，这里重要的是渐进式rehash。rehash就是扩容操作，防止链变长严重影响哈希表的性能。

type：指向字典类型结构（dictType）的指针，该结构定义了字典的操作函数，如插入、查找、删除等。

ht_table[2]：散列表数组，包含两个散列表。Redis 使用两个散列表实现渐进式 rehash，其中一个是当前使用的散列表，另一个是正在进行 rehash 的散列表。

ht_used[2]：记录两个散列表当前已使用的节点数量，用于动态调整散列表的大小。

rehashidx：用于指示是否正在进行 rehash 过程。如果 rehashidx 的值为 -1，表示没有进行 rehash；否则，它表示正在进行 rehash 的索引。

pauserehash：如果大于 0，表示 rehash 过程被暂停了（小于 0 表示编码错误）。

ht_size_exp[2]：保存两个散列表的大小的指数部分，实际大小为 2 的 exp 次方。

这里采用的是拉链法，每个元素都是一个链表。（学习源码你先不要专注每一行，快速浏览文章知道整个过程，然后自己下载源码按照这个思路去读在读一些你感兴趣的部分就好，漫无目的的读很枯燥，源码量很大，但是redis源码很清晰）

c
struct dict {
    dictType *type;              // 指向字典类型结构的指针，定义了字典的各种操作函数

    dictEntry **ht_table[2];     // 散列表数组，ht_table[0] 和 ht_table[1] 分别表示两个散列表
    unsigned long ht_used[2];    // 两个散列表当前已使用的节点数量

    long rehashidx;              // 如果 rehashidx == -1，表示没有进行 rehash 过程，否则表示正在进行 rehash 的索引

    int16_t pauserehash;         // 如果大于 0，表示 rehash 过程被暂停了（小于 0 表示编码错误）
    signed char ht_size_exp[2];  // 保存两个散列表的大小的指数部分（实际大小为 2^exp）
};

dictType主要是用来标识不同的运用，比如zset,set,还有redis一些内部运用，主要是一些操作函数，很有面向对象多态那个味道。

c

typedef struct dictType {
    unsigned int (*hashFunction)(const void *key);
    void *(*keyDup)(void *privdata, const void *key);
    void *(*valDup)(void *privdata, const void *obj);
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    void (*keyDestructor)(void *privdata, void *key);
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

接下来是dictEntry，是桶结构，就是一个链表

c
struct dictEntry {
    void *key;                  // 键的指针

   union {                     //小技巧，当类型确定可以省一个指针
        void *val;              // 值的指针（robj）
        uint64_t u64;           // 64 位无符号整数
        int64_t s64;            // 64 位有符号整数
        double d;               // 双精度浮点数
    } v;

    struct dictEntry *next;     // 同一个哈希桶中的下一个节点
};

//

typedef struct {
    void *key;
    dictEntry *next;
} dictEntryNoValue;

接下来是哈希函数相关

dict_hash_function_seed：静态数组，用于存储哈希函数的种子值。

dictSetHashFunctionSeed：函数用于设置哈希函数种子值。通过 memcpy 将传入的 seed 复制到 dict_hash_function_seed。

dictGetHashFunctionSeed：函数返回哈希函数种子值的指针。

siphash 和 siphash_nocase：这两个函数是哈希函数的具体实现，它们使用 SipHash 算法对输入进行哈希计算。其中，siphash 对大小写敏感，而 siphash_nocase 对大小写不敏感。

dictGenHashFunction：函数接受键值和长度作为输入参数，并使用 siphash 函数对其进行哈希计算。它返回一个 64 位的哈希值。

dictGenCaseHashFunction：函数类似于 dictGenHashFunction，但它使用 siphash_nocase 函数对键值进行哈希计算，以实现大小写不敏感的哈希。

c
static uint8_t dict_hash_function_seed[16];

void dictSetHashFunctionSeed(uint8_t *seed) {
    memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed));
}

uint8_t *dictGetHashFunctionSeed(void) {
    return dict_hash_function_seed;
}

/* The default hashing function uses SipHash implementation
 * in siphash.c. */

uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k);
uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k);

uint64_t dictGenHashFunction(const void *key, size_t len) {
    return siphash(key,len,dict_hash_function_seed);
}

uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) {
    return siphash_nocase(buf,len,dict_hash_function_seed);
}

siphash算法是一种为了应对哈希洪水攻击设计的优秀的哈希算法，感兴趣的了可以去了解一下，很多语言，比如rust,ruby默认的哈希算法就是siphash算法。

加下来就是rehash，redis的rehash是渐进式，什么是渐进式？一个dict中有两个哈希表，table[0]是实际业务使用的，table[1]是备用，当我们第一个表太大达到了需要rehash条件，就进行rehash，但是一次复制整个哈希表的数据，阻塞耗时，选择一个桶一个桶复制，每次只复制一个捅，这样会减少单次阻塞时间。当我们所有数据都复制到table[1],把table[0]指针指向table[1],原本的要free掉。扩容的大小是多少呢？现在很多讲解的版本是used[0] * 2 ,但是新版本已经变成+1，这点我能想到的解释是省内存。结合代码分析

c
/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *d)
{
    /* Incremental rehashing already in progress. Return. */
    if (dictIsRehashing(d)) return DICT_OK;

    /* If the hash table is empty expand it to the initial size. */
    if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

    /* If we reached the 1:1 ratio, and we are allowed to resize the hash
     * table (global setting) or we should avoid it but the ratio between
     * elements/buckets is over the "safe" threshold, we resize doubling
     * the number of buckets. */
    if (!dictTypeExpandAllowed(d))
        return DICT_OK;
    if ((dict_can_resize == DICT_RESIZE_ENABLE &&
         d->ht_used[0] >= DICTHT_SIZE(d->ht_size_exp[0])) ||
        (dict_can_resize != DICT_RESIZE_FORBID &&
         d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0]) > dict_force_resize_ratio))
    {
        return dictExpand(d, d->ht_used[0] + 1);
    }
    return DICT_OK;
}

dictIsRehashing(d)：检查字典是否正在进行渐进式 rehash 过程。如果是，则直接返回 DICT_OK，不进行扩展。

DICTHT_SIZE(d->ht_size_exp[0]) == 0：检查当前哈希表是否为空。如果是空的，则将其扩展到初始大小，并返回 DICT_OK。

dictTypeExpandAllowed(d)：检查是否允许对字典进行扩展。根据全局设置和字典类型的扩展限制进行判断。如果不允许扩展，则返回 DICT_OK。

(dict_can_resize == DICT_RESIZE_ENABLE && d->ht_used[0] >= DICTHT_SIZE(d->ht_size_exp[0]))：如果允许对字典进行扩展（全局设置为启用）并且当前使用的节点数达到了当前哈希表大小的 1:1 比例，就需要扩展。返回 dictExpand(d, d->ht_used[0] + 1)，其中 d->ht_used[0] + 1 表示新的哈希表大小。

(dict_can_resize != DICT_RESIZE_FORBID && d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0]) > dict_force_resize_ratio)：如果不允许扩展（全局设置为禁止）但元素/桶的比率超过了安全阈值(默认等于5,也就是超过哈希表默认大小的五倍)，即使不允许扩展，也需要扩展。返回 dictExpand(d, d->ht_used[0] + 1)。

如果以上条件都不满足，则返回 DICT_OK。

c
/* Our hash table capability is a power of two */
static signed char _dictNextExp(unsigned long size) {
    unsigned char e = DICT_HT_INITIAL_EXP;

    if (size >= LONG_MAX) return (8*sizeof(long)-1);
    while(1) {
        if (((unsigned long)1<<e) >= size)
            return e;
        e++;
    }
}



/* 扩展或创建哈希表，
 * 当malloc_failed非空时，如果malloc失败（此时它将被设置为1），将避免出现错误。
 * 如果执行了扩展，返回DICT_OK；如果跳过了扩展，返回DICT_ERR。 */
int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
{
    if (malloc_failed) *malloc_failed = 0;

    /* 如果size小于哈希表中已有的元素数量，则视为无效的大小 */
    if (dictIsRehashing(d) || d->ht_used[0] > size)
        return DICT_ERR;

    /* 新的哈希表 */
    dictEntry **new_ht_table;
    unsigned long new_ht_used;
    signed char new_ht_size_exp = _dictNextExp(size);

    /* 检测溢出 */
    size_t newsize = 1ul<<new_ht_size_exp;
    if (newsize < size || newsize * sizeof(dictEntry*) < newsize)
        return DICT_ERR;

    /* 如果malloc_failed非空，尝试分配新的哈希表并将malloc_failed设置为1，
     * 如果分配失败，则返回DICT_ERR。 */
    if (malloc_failed) {
        new_ht_table = ztrycalloc(newsize*sizeof(dictEntry*));
        *malloc_failed = new_ht_table == NULL;
        if (*malloc_failed)
            return DICT_ERR;
    } else
        new_ht_table = zcalloc(newsize*sizeof(dictEntry*));

    new_ht_used = 0;

    /* 如果是第一次初始化哈希表，则不是真正的rehash操作，只是设置第一个哈希表以接受键。 */
    if (d->ht_table[0] == NULL) {
        if (d->type->rehashingStarted) d->type->rehashingStarted(d);
        if (d->type->rehashingCompleted) d->type->rehashingCompleted(d);
        d->ht_size_exp[0] = new_ht_size_exp;
        d->ht_used[0] = new_ht_used;
        d->ht_table[0] = new_ht_table;
        return DICT_OK;
    }

    /* 准备第二个哈希表用于增量rehash操作 */
    d->ht_size_exp[1] = new_ht_size_exp;
    d->ht_used[1] = new_ht_used;
    d->ht_table[1] = new_ht_table;
    d->rehashidx = 0;
    if (d->type->rehashingStarted) d->type->rehashingStarted(d);
    return DICT_OK;
}

/* return DICT_ERR if expand was not performed */
int dictExpand(dict *d, unsigned long size) {
    return _dictExpand(d, size, NULL);
}

/* return DICT_ERR if expand failed due to memory allocation failure */
int dictTryExpand(dict *d, unsigned long size) {
    int malloc_failed;
    _dictExpand(d, size, &malloc_failed);
    return malloc_failed? DICT_ERR : DICT_OK;
}

这些代码主要做的就是决定要不要启动rehash,还有设置新的大小。真正做rehash的函数是下面几个这里的优化也很多，empty_visits表示最多可检查的桶，因为检查也是耗时，不能耽误性能。n始终为1，每次rehash只会复制一个桶。

c
static void _dictRehashStep(dict *d) {
    if (d->pauserehash == 0) dictRehash(d,1);
}



int dictRehash(dict *d, int n) {
    int empty_visits = n * 10; /* 最多访问的空桶数 */
    unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]); // 旧哈希表的大小
    unsigned long s1 = DICTHT_SIZE(d->ht_size_exp[1]); // 新哈希表的大小

    // 如果不允许调整大小或者没有进行 rehash，则直接返回 0
    if (dict_can_resize == DICT_RESIZE_FORBID || !dictIsRehashing(d))
        return 0;
    
    // 如果使用 DICT_RESIZE_AVOID，并且新哈希表的大小与旧哈希表的大小相差不大，则直接返回 0
    if (dict_can_resize == DICT_RESIZE_AVOID &&
        ((s1 > s0 && s1 / s0 < dict_force_resize_ratio) ||
         (s1 < s0 && s0 / s1 < dict_force_resize_ratio))) {
        return 0;
    }

    // 对旧哈希表进行 rehash
    while (n-- && d->ht_used[0] != 0) {
        dictEntry *de, *nextde;

        // 注意，rehashidx 不会溢出，因为我们确认有更多元素，因为 ht[0].used != 0
        assert(DICTHT_SIZE(d->ht_size_exp[0]) > (unsigned long)d->rehashidx);

        // 寻找下一个非空桶
        while (d->ht_table[0][d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0)
                return 1;
        }

        de = d->ht_table[0][d->rehashidx];

        // 将这个桶中的所有键从旧哈希表移动到新哈希表
        while (de) {
            uint64_t h;
            nextde = dictGetNext(de);
            void *key = dictGetKey(de);

            // 在新哈希表中获取索引
            if (d->ht_size_exp[1] > d->ht_size_exp[0]) {
                h = dictHashKey(d, key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
            } else {
                // 我们正在缩小哈希表。表的大小是 2 的幂次方，所以我们可以通过在较大表中掩码桶索引来获取较小表中的桶索引。
                h = d->rehashidx & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
            }

            // 根据字典类型进行不同的处理
            if (d->type->no_value) {
                if (d->type->keys_are_odd && !d->ht_table[1][h]) {
                    /* 目标桶为空，可以直接存储键，无需分配条目。如果旧条目是已分配的条目，则释放它。
                     *
                     * TODO: 添加一个 'keys_are_even' 标志，如果设置了，也可以对这些字典使用此优化。
                     * 存储时我们可以将 LSB 位设置为存储为字典条目的键，并在需要键时再将其清除。 */
                    assert(entryIsKey(key));
                    if (!entryIsKey(de))
                        zfree(decodeMaskedPtr(de));
                    de = key;
                } else if (entryIsKey(de)) {
                    /* 没有分配的条目，但我们需要一个。 */
                    de = createEntryNoValue(key, d->ht_table[1][h]);
                } else {
                    /* 只需将现有条目移动到目标表，并更新 'next' 字段。 */
                    assert(entryIsNoValue(de));
                    dictSetNext(de, d->ht_table[1][h]);
                }
            } else {
                dictSetNext(de, d->ht_table[1][h]);
            }

            d->ht_table[1][h] = de;
            d->ht_used[0]--;
            d->ht_used[1]++;
            de = nextde;
        }

        d->ht_table[0][d->rehashidx] = NULL;
        d->rehashidx++;
    }

    // 检查是否已经rehash 了整个哈希表...

    if (d->ht_used[0] == 0) {
        if (d->type->rehashingCompleted)
            d->type->rehashingCompleted(d);
        zfree(d->ht_table[0]);

        // 将新哈希表复制到旧哈希表
        d->ht_table[0] = d->ht_table[1];
        d->ht_used[0] = d->ht_used[1];
        d->ht_size_exp[0] = d->ht_size_exp[1];
        _dictReset(d, 1);
        d->rehashidx = -1;
        return 0;
    }

    // 还有更多需要 rehash 的元素...
    return 1;
}

调用rehash的函数有哪些

Find

c
dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    uint64_t h, idx, table;

    if (dictSize(d) == 0) return NULL; /* dict is empty */
    if (dictIsRehashing(d)) _dictRehashStep(d); //
    h = dictHashKey(d, key);
    for (table = 0; table <= 1; table++) {
        idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
        if (table == 0 && (long)idx < d->rehashidx) continue; 
        he = d->ht_table[table][idx];
        while(he) {
            void *he_key = dictGetKey(he);
            if (key == he_key || dictCompareKeys(d, key, he_key))
                return he;
            he = dictGetNext(he);
        }
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

AddRow

c
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    /* Get the position for the new key or NULL if the key already exists. */
    void *position = dictFindPositionForInsert(d, key, existing);
    if (!position) return NULL;

    /* Dup the key if necessary. */
    if (d->type->keyDup) key = d->type->keyDup(d, key);

    return dictInsertAtPosition(d, key, position);
}

/* Finds and returns the position within the dict where the provided key should
 * be inserted using dictInsertAtPosition if the key does not already exist in
 * the dict. If the key exists in the dict, NULL is returned and the optional
 * 'existing' entry pointer is populated, if provided. */
void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing) {
    unsigned long idx, table;
    dictEntry *he;
    uint64_t hash = dictHashKey(d, key);  // 计算给定键的哈希值
    if (existing) *existing = NULL;  // 如果提供了existing参数，则将其初始化为NULL
    if (dictIsRehashing(d)) _dictRehashStep(d);  // 如果正在进行rehash，则执行一次rehash步骤

    /* Expand the hash table if needed */
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return NULL;  // 如果需要，扩展哈希表的大小

    // 在两个哈希表中查找位置
    for (table = 0; table <= 1; table++) {
        idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);  // 根据哈希表大小计算索引
        if (table == 0 && (long)idx < d->rehashidx) continue;  // 如果正在进行rehash，并且索引小于rehash的位置，则继续下一个循环

        /* Search if this slot does not already contain the given key */
        he = d->ht_table[table][idx];  // 获取当前哈希桶的第一个条目指针
        while(he) {
            void *he_key = dictGetKey(he);  // 获取条目的键
            if (key == he_key || dictCompareKeys(d, key, he_key)) {
                if (existing) *existing = he;  // 如果键已经存在，则将existing设置为该条目
                return NULL;  // 返回NULL表示键已经存在于字典中
            }
            he = dictGetNext(he);  // 获取下一个条目
        }

        if (!dictIsRehashing(d)) break;  // 如果不在进行rehash，则跳出循环
    }

    /* If we are in the process of rehashing the hash table, the bucket is
     * always returned in the context of the second (new) hash table. */
    dictEntry **bucket = &d->ht_table[dictIsRehashing(d) ? 1 : 0][idx];  // 获取哈希桶的指针
    return bucket;  // 返回哈希桶的指针，表示应该在该位置插入新键
}

Delete

c
static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
    uint64_t h, idx;         // 哈希值和哈希桶索引
    dictEntry *he, *prevHe;  // 当前条目指针和前一个条目指针
    int table;               // 哈希表索引

    /* dict is empty */
    if (dictSize(d) == 0) return NULL;  // 字典为空，直接返回NULL

    if (dictIsRehashing(d)) _dictRehashStep(d);  // 如果正在进行rehash，则执行一次rehash步骤
    h = dictHashKey(d, key);  // 计算键的哈希值

    for (table = 0; table <= 1; table++) {
        idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);  // 计算哈希桶索引
        if (table == 0 && (long)idx < d->rehashidx) continue;  // 在rehash过程中，跳过第一个哈希表中rehash索引之前的桶
        he = d->ht_table[table][idx];  // 获取对应哈希桶的第一个条目指针
        prevHe = NULL;  // 初始化前一个条目指针为NULL
        while(he) {
            void *he_key = dictGetKey(he);  // 获取当前条目的键
            if (key == he_key || dictCompareKeys(d, key, he_key)) {
                /* Unlink the element from the list */
                if (prevHe)
                    dictSetNext(prevHe, dictGetNext(he));  // 将前一个条目指针的next指针指向当前条目的next指针
                else
                    d->ht_table[table][idx] = dictGetNext(he);  // 如果前一个条目为空，则更新哈希桶的第一个条目指针
                if (!nofree) {
                    dictFreeUnlinkedEntry(d, he);  // 如果不禁止释放，释放当前条目的内存
                }
                d->ht_used[table]--;  // 更新哈希表中的条目数量
                return he;  // 返回被删除的条目指针
            }
            prevHe = he;  // 更新前一个条目指针为当前条目指针
            he = dictGetNext(he);  // 获取下一个条目指针
        }
        if (!dictIsRehashing(d)) break;  // 如果不在进行rehash，则退出循环
    }
    return NULL; /* not found */
}

/* Remove an element, returning DICT_OK on success or DICT_ERR if the
 * element was not found. */
int dictDelete(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}

RadomKey

c
/* Return a random entry from the hash table. Useful to
 * implement randomized algorithms */
dictEntry *dictGetRandomKey(dict *d)
{
    dictEntry *he, *orighe;     // 哈希表条目指针，用于遍历和返回结果
    unsigned long h;            // 哈希值
    int listlen, listele;       // 链表长度和元素索引

    if (dictSize(d) == 0) return NULL;  // 哈希表为空，直接返回NULL
    if (dictIsRehashing(d)) _dictRehashStep(d);  // 如果正在进行rehash，则执行一次rehash步骤
    if (dictIsRehashing(d)) {
        unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]);
        do {
            /* We are sure there are no elements in indexes from 0
             * to rehashidx-1 */
            h = d->rehashidx + (randomULong() % (dictBuckets(d) - d->rehashidx));
            he = (h >= s0) ? d->ht_table[1][h - s0] : d->ht_table[0][h];
        } while(he == NULL);
    } else {
        unsigned long m = DICTHT_SIZE_MASK(d->ht_size_exp[0]);
        do {
            h = randomULong() & m;  // 生成随机的哈希值
            he = d->ht_table[0][h];  // 获取对应哈希值的条目指针
        } while(he == NULL);
    }

    /* Now we found a non empty bucket, but it is a linked
     * list and we need to get a random element from the list.
     * The only sane way to do so is counting the elements and
     * select a random index. */
    listlen = 0;
    orighe = he;  // 保存初始的哈希表条目指针
    while(he) {   // 计算链表的长度
        he = dictGetNext(he);
        listlen++;
    }
    listele = random() % listlen;  // 随机生成链表中的元素索引
    he = orighe;  // 重新将哈希表条目指针指向初始位置
    while(listele--) he = dictGetNext(he);  // 根据索引找到对应的条目指针
    return he;  // 返回随机获取的哈希表条目指针
}

GetSomeKey

c
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
    unsigned long j; /* internal hash table id, 0 or 1. */
    unsigned long tables; /* 1 or 2 tables? */
    unsigned long stored = 0, maxsizemask;
    unsigned long maxsteps;

    if (dictSize(d) < count) count = dictSize(d);  // 如果字典大小小于count，则将count设置为字典的大小
    maxsteps = count*10;  // 最大迭代步数为count的10倍

    /* Try to do a rehashing work proportional to 'count'. */
    for (j = 0; j < count; j++) {
        if (dictIsRehashing(d))
            _dictRehashStep(d);  // 如果正在进行rehash，则执行一次rehash步骤
        else
            break;
    }

    tables = dictIsRehashing(d) ? 2 : 1;  // 确定字典中的哈希表数量
    maxsizemask = DICTHT_SIZE_MASK(d->ht_size_exp[0]);  // 获取第一个哈希表的大小掩码
    if (tables > 1 && maxsizemask < DICTHT_SIZE_MASK(d->ht_size_exp[1]))
        maxsizemask = DICTHT_SIZE_MASK(d->ht_size_exp[1]);  // 如果正在进行rehash并且第二个哈希表的大小掩码更大，则更新maxsizemask

    /* Pick a random point inside the larger table. */
    unsigned long i = randomULong() & maxsizemask;  // 在较大的哈希表中选择一个随机位置
    unsigned long emptylen = 0; /* Continuous empty entries so far. */
    while(stored < count && maxsteps--) {
        for (j = 0; j < tables; j++) {
            /* Invariant of the dict.c rehashing: up to the indexes already
             * visited in ht[0] during the rehashing, there are no populated
             * buckets, so we can skip ht[0] for indexes between 0 and idx-1. */
            if (tables == 2 && j == 0 && i < (unsigned long) d->rehashidx) {
                /* Moreover, if we are currently out of range in the second
                 * table, there will be no elements in both tables up to
                 * the current rehashing index, so we jump if possible.
                 * (this happens when going from big to small table). */
                if (i >= DICTHT_SIZE(d->ht_size_exp[1]))
                    i = d->rehashidx;  // 如果当前位置超出第二个哈希表的范围，则跳到rehash的位置
                else
                    continue;
            }
            if (i >= DICTHT_SIZE(d->ht_size_exp[j])) continue; /* Out of range for this table. */
            dictEntry *he = d->ht_table[j][i];  // 获取当前位置的条目

            /* Count contiguous empty buckets, and jump to other
             * locations if they reach 'count' (with a minimum of 5). */
            if (he == NULL) {
                emptylen++;
                if (emptylen >= 5 && emptylen > count) {
                    i = randomULong() & maxsizemask;  // 如果连续空桶达到一定数量，则随机选择下一个位置
                    emptylen = 0;
                }
            } else {
                emptylen = 0;
                while (he) {
                    /* Collect all the elements of the buckets found non empty while iterating.
                     * To avoid the issue of being unable to sample the end of a long chain,
                     * we utilize the Reservoir Sampling algorithm to optimize the sampling process.
                     * This means that even when the maximum number of samples has been reached,
                     * we continue sampling until we reach the end of the chain.
                     * See https://en.wikipedia.org/wiki/Reservoir_sampling. */
                    if (stored < count) {
                        des[stored] = he;  // 将非空桶中的条目存储在des数组中
                    } else {
                        unsigned long r = randomULong() % (stored + 1);
                        if (r < count) des[r] = he;  // 当已经存储了count个条目时，使用蓄水池抽样算法随机选择一个位置存储当前条目
                    }

                    he = dictGetNext(he);  // 获取下一个条目
                    stored++;  // 已存储的条目数加1
                }
                if (stored >= count) goto end;  // 如果已存储的条目数达到count，则跳到函数末尾
            }
        }
        i = (i+1) & maxsizemask;  // 移动到下一个位置
    }

end:
    return stored > count ? count : stored;  // 返回实际存储的条目数，不超过count
}

到这里就是整个rehash的过程

目录

Find

AddRow

Delete

RadomKey

GetSomeKey