Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rocksdb::BlockBasedTableOptions::kDataBlockBinaryAndHash returns incorrect value with timestamp #782

Open
peroket opened this issue Nov 27, 2023 · 2 comments

Comments

@peroket
Copy link

peroket commented Nov 27, 2023

Describe the bug
If using timestamp and rocksdb::BlockBasedTableOptions::kDataBlockBinaryAndHash, the returned value in Get() is not the latest value.

To Reproduce
Test done with speedb 2.7.0 and Catch2.

// Taken from rocksdb
rocksdb::Slice StripTimestampFromUserKey(const rocksdb::Slice& user_key, const size_t ts_sz) noexcept {
    rocksdb::Slice ret = user_key;
    ret.remove_suffix(ts_sz);
    return ret;
}

// Taken from rocksdb
rocksdb::Slice ExtractTimestampFromUserKey(const rocksdb::Slice& user_key, const size_t ts_sz) noexcept {
    assert(user_key.size() >= ts_sz);
    return {user_key.data() + user_key.size() - ts_sz, ts_sz};
}

// Taken from rocksdb
void EncodeFixed32(char* buf, uint32_t value) noexcept {
    buf[0] = static_cast<char>(value & 0xff);
    buf[1] = static_cast<char>((value >> 8) & 0xff);
    buf[2] = static_cast<char>((value >> 16) & 0xff);
    buf[3] = static_cast<char>((value >> 24) & 0xff);
}

// Taken from rocksdb
uint32_t DecodeFixed32(const char* ptr) noexcept {
    return ((static_cast<uint32_t>(static_cast<unsigned char>(ptr[0]))) |
            (static_cast<uint32_t>(static_cast<unsigned char>(ptr[1])) << 8) |
            (static_cast<uint32_t>(static_cast<unsigned char>(ptr[2])) << 16) |
            (static_cast<uint32_t>(static_cast<unsigned char>(ptr[3])) << 24));
}

// Taken from rocksdb https://github.com/facebook/rocksdb/blob/main/util/comparator.cc#L237
class ComparatorWithTimestamp : public rocksdb::Comparator {
  public:
    explicit ComparatorWithTimestamp() noexcept : Comparator(/*ts_sz=*/sizeof(uint32_t)) {
        assert(cmp_without_ts_->timestamp_size() == 0);
    }

    // The comparator that compares the user key without timestamp part is treated
    // as the root comparator.
    [[nodiscard]] const Comparator* GetRootComparator() const override { return cmp_without_ts_; }

    void FindShortSuccessor(std::string* /*key*/) const override {}
    void FindShortestSeparator(std::string* /*start*/, const rocksdb::Slice& /*limit*/) const override {}
    [[nodiscard]] int Compare(const rocksdb::Slice& a, const rocksdb::Slice& b) const override {
        const int ret = CompareWithoutTimestamp(a, b);
        const size_t ts_sz = timestamp_size();
        if (ret != 0) {
            return ret;
        }
        // Compare timestamp.
        // For the same user key with different timestamps, larger (newer) timestamp
        // comes first.
        return -CompareTimestamp(ExtractTimestampFromUserKey(a, ts_sz), ExtractTimestampFromUserKey(b, ts_sz));
    }
    using Comparator::CompareWithoutTimestamp;
    [[nodiscard]] int CompareWithoutTimestamp(const rocksdb::Slice& a, bool a_has_ts, const rocksdb::Slice& b,
                                              bool b_has_ts) const override {
        const size_t ts_sz = timestamp_size();
        assert(!a_has_ts || a.size() >= ts_sz);
        assert(!b_has_ts || b.size() >= ts_sz);
        const rocksdb::Slice lhs = a_has_ts ? StripTimestampFromUserKey(a, ts_sz) : a;
        const rocksdb::Slice rhs = b_has_ts ? StripTimestampFromUserKey(b, ts_sz) : b;
        return cmp_without_ts_->Compare(lhs, rhs);
    }
    [[nodiscard]] int CompareTimestamp(const rocksdb::Slice& ts1, const rocksdb::Slice& ts2) const override {
        assert(ts1.size() == sizeof(uint32_t));
        assert(ts2.size() == sizeof(uint32_t));
        const auto lhs = DecodeFixed32(ts1.data());
        const auto rhs = DecodeFixed32(ts2.data());
        if (lhs < rhs) {
            return -1;
        } else if (lhs > rhs) {
            return 1;
        } else {
            return 0;
        }
    }
    [[nodiscard]] bool CanKeysWithDifferentByteContentsBeEqual() const override { return false; }

    [[nodiscard]] const char* Name() const override { return "name"; }

  private:
    const rocksdb::Comparator* cmp_without_ts_ = rocksdb::BytewiseComparator();
};

TEST_CASE("Timestamp bug") {
    const auto* path = ...; // set your path

    rocksdb::DB* db_ptr{};
    rocksdb::Options options;
    ComparatorWithTimestamp cmp;
    options.create_if_missing = true;
    options.comparator = &cmp;

    rocksdb::BlockBasedTableOptions table_options;
    // Comment this line to make the test pass
    table_options.data_block_index_type = rocksdb::BlockBasedTableOptions::kDataBlockBinaryAndHash;
    options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options));

    REQUIRE(rocksdb::DB::Open(options, path, &db_ptr).ok());
    const std::unique_ptr<rocksdb::DB> db(db_ptr);

    constexpr size_t count = 10'000;
    std::string timestamp(4, '\0');
    std::string key(32, '\0');

    for (uint32_t i = 0; i < count; ++i) {
        EncodeFixed32(timestamp.data(), i);
        db->Put(rocksdb::WriteOptions(), key, timestamp, std::to_string(i));
    }
    db->Flush(rocksdb::FlushOptions());

    std::string result;
    rocksdb::ReadOptions read_options;
    const rocksdb::Slice slice(timestamp);
    EncodeFixed32(timestamp.data(), count);
    read_options.timestamp = &slice;
    const auto status = db->Get(read_options, key, &result);
    REQUIRE(status.ok());
    REQUIRE(result == std::to_string(count - 1)); // This fails and return 9823 instead of 9999

}

Expected behavior
The result should be the same regardless of the index.

System (please complete the following information):

  • OS: [Ubuntu >= 22.04] (not tested below)
  • Hardware [Intel or AMD processor, enough ram and disk]
@peroket
Copy link
Author

peroket commented Nov 27, 2023

Note: tried with rocksdb 8.8.1 and was able to reproduce the issue

@peroket
Copy link
Author

peroket commented Nov 27, 2023

Opened an issue on rocksdb github as well: facebook/rocksdb#12100

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant