* [PATCH 1/5] Osd: add three fields to pg_pool_t
2015-05-21 13:34 [PATCH] Osd: temperature based object eviction for cache tiering Li Wang
@ 2015-05-21 13:34 ` Li Wang
2015-05-21 13:34 ` [PATCH 2/5] Mon: expose commands for temperature related setting Li Wang
` (3 subsequent siblings)
4 siblings, 0 replies; 8+ messages in thread
From: Li Wang @ 2015-05-21 13:34 UTC (permalink / raw)
To: Sage Weil; +Cc: ceph-devel, MingXin Liu
From: MingXin Liu <mingxinliu@ubuntukylin.com>
Signed-off-by: MingXin Liu <mingxinliu@ubuntukylin.com>
Reviewed-by: Li Wang <liwang@ubuntukylin.com>
---
src/osd/osd_types.cc | 32 ++++++++++++++++++++++++++++++--
src/osd/osd_types.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 79 insertions(+), 2 deletions(-)
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index a73b46f..ba81889 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -871,7 +871,6 @@ void pool_snap_info_t::generate_test_instances(list<pool_snap_info_t*>& o)
o.back()->name = "foo";
}
-
// -- pg_pool_t --
void pg_pool_t::dump(Formatter *f) const
@@ -910,6 +909,7 @@ void pg_pool_t::dump(Formatter *f) const
f->dump_int("read_tier", read_tier);
f->dump_int("write_tier", write_tier);
f->dump_string("cache_mode", get_cache_mode_name());
+ f->dump_string("cache_measure", get_cache_measure_name());
f->dump_unsigned("target_max_bytes", target_max_bytes);
f->dump_unsigned("target_max_objects", target_max_objects);
f->dump_unsigned("cache_target_dirty_ratio_micro",
@@ -925,6 +925,11 @@ void pg_pool_t::dump(Formatter *f) const
f->dump_unsigned("hit_set_period", hit_set_period);
f->dump_unsigned("hit_set_count", hit_set_count);
f->dump_unsigned("min_read_recency_for_promote", min_read_recency_for_promote);
+ f->dump_unsigned("hit_set_grade_decay_rate",hit_set_grade_decay_rate);
+ f->open_array_section("grade_table");
+ for (vector<uint32_t>::const_iterator p = grade_table.begin(); p != grade_table.end(); ++p)
+ f->dump_unsigned("grade", *p);
+ f->close_section();
f->dump_unsigned("stripe_width", get_stripe_width());
f->dump_unsigned("expected_num_objects", expected_num_objects);
}
@@ -1226,7 +1231,7 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
return;
}
- ENCODE_START(17, 5, bl);
+ ENCODE_START(18, 5, bl);
::encode(type, bl);
::encode(size, bl);
::encode(crush_ruleset, bl);
@@ -1268,6 +1273,9 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
::encode(last_force_op_resend, bl);
::encode(min_read_recency_for_promote, bl);
::encode(expected_num_objects, bl);
+ __u8 m = cache_measure;
+ ::encode(m, bl);
+ ::encode(hit_set_grade_decay_rate, bl);
ENCODE_FINISH(bl);
}
@@ -1385,6 +1393,16 @@ void pg_pool_t::decode(bufferlist::iterator& bl)
} else {
expected_num_objects = 0;
}
+ if (struct_v >= 18) {
+ __u8 v;
+ ::decode(v, bl);
+ cache_measure = (cache_measure_t)v;
+ ::decode(hit_set_grade_decay_rate, bl);
+ set_grade(hit_set_grade_decay_rate, hit_set_count);
+ } else {
+ cache_measure = CACHEMEASURE_ATIME;
+ hit_set_grade_decay_rate = 0;
+ }
DECODE_FINISH(bl);
calc_pg_masks();
}
@@ -1425,12 +1443,16 @@ void pg_pool_t::generate_test_instances(list<pg_pool_t*>& o)
a.tiers.insert(1);
a.tier_of = 2;
a.cache_mode = CACHEMODE_WRITEBACK;
+ a.cache_measure = CACHEMEASURE_ATIME;
a.read_tier = 1;
a.write_tier = 1;
a.hit_set_params = HitSet::Params(new BloomHitSet::Params);
a.hit_set_period = 3600;
a.hit_set_count = 8;
a.min_read_recency_for_promote = 1;
+ a.hit_set_grade_decay_rate = 50;
+ a.grade_table.push_back(1000000);
+ a.grade_table.push_back(500000);
a.set_stripe_width(12345);
a.target_max_bytes = 1238132132;
a.target_max_objects = 1232132;
@@ -1475,6 +1497,8 @@ ostream& operator<<(ostream& out, const pg_pool_t& p)
out << " write_tier " << p.write_tier;
if (p.cache_mode)
out << " cache_mode " << p.get_cache_mode_name();
+ if (p.cache_mode)
+ out << " cache_measure " << p.get_cache_measure_name();
if (p.target_max_bytes)
out << " target_bytes " << p.target_max_bytes;
if (p.target_max_objects)
@@ -1483,6 +1507,10 @@ ostream& operator<<(ostream& out, const pg_pool_t& p)
out << " hit_set " << p.hit_set_params
<< " " << p.hit_set_period << "s"
<< " x" << p.hit_set_count;
+ if (p.cache_measure == pg_pool_t::CACHEMEASURE_TEMP) {
+ out << " decay_rate " << p.hit_set_grade_decay_rate
+ << " grade_table" << p.grade_table;
+ }
}
if (p.min_read_recency_for_promote)
out << " min_read_recency_for_promote " << p.min_read_recency_for_promote;
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 18f5402..7bea017 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -888,6 +888,12 @@ struct pg_pool_t {
CACHEMODE_READFORWARD = 4, ///< forward reads, write to cache flush later
CACHEMODE_READPROXY = 5 ///< proxy reads, write to cache flush later
} cache_mode_t;
+
+ typedef enum {
+ CACHEMEASURE_ATIME = 0, ///< judge hot by atime
+ CACHEMEASURE_TEMP = 1 ///< judge hot by temperature
+ } cache_measure_t;
+
static const char *get_cache_mode_name(cache_mode_t m) {
switch (m) {
case CACHEMODE_NONE: return "none";
@@ -932,6 +938,24 @@ struct pg_pool_t {
}
}
+ static const char *get_cache_measure_name(cache_measure_t m) {
+ switch (m) {
+ case CACHEMEASURE_ATIME: return "atime";
+ case CACHEMEASURE_TEMP: return "temperature";
+ default: return "unknown";
+ }
+ }
+ static cache_measure_t get_cache_measure_from_str(const string& s) {
+ if (s == "atime")
+ return CACHEMEASURE_ATIME;
+ if (s == "temperature")
+ return CACHEMEASURE_TEMP;
+ return (cache_measure_t)-1;
+ }
+ const char *get_cache_measure_name() const {
+ return get_cache_measure_name(cache_measure);
+ }
+
uint64_t flags; ///< FLAG_*
__u8 type; ///< TYPE_*
__u8 size, min_size; ///< number of osds in each pg
@@ -976,6 +1000,7 @@ public:
int64_t read_tier; ///< pool/tier for objecter to direct reads to
int64_t write_tier; ///< pool/tier for objecter to direct writes to
cache_mode_t cache_mode; ///< cache pool mode
+ cache_measure_t cache_measure; ///< cache measure demension,atime or temperature
bool is_tier() const { return tier_of >= 0; }
bool has_tiers() const { return !tiers.empty(); }
@@ -993,6 +1018,7 @@ public:
if (cache_mode != CACHEMODE_NONE)
flags |= FLAG_INCOMPLETE_CLONES;
cache_mode = CACHEMODE_NONE;
+ cache_measure = CACHEMEASURE_ATIME;
target_max_bytes = 0;
target_max_objects = 0;
@@ -1001,6 +1027,8 @@ public:
hit_set_params = HitSet::Params();
hit_set_period = 0;
hit_set_count = 0;
+ hit_set_grade_decay_rate = 0;
+ grade_table.resize(0);
}
uint64_t target_max_bytes; ///< tiering: target max pool size
@@ -1016,6 +1044,25 @@ public:
uint32_t hit_set_period; ///< periodicity of HitSet segments (seconds)
uint32_t hit_set_count; ///< number of periods to retain
uint32_t min_read_recency_for_promote; ///< minimum number of HitSet to check before promote
+ uint32_t hit_set_grade_decay_rate; ///< current hit_set has highest priority on objects
+ ///temperature count,the follow hit_set's priority decay
+ ///by this params than pre hit_set
+ vector<uint32_t> grade_table;
+ void set_grade(uint32_t decay, unsigned size)
+ {
+ unsigned v = 1000000;
+ grade_table.resize(size);
+ for (unsigned i = 0; i < size; i++) {
+ grade_table[i] = v;
+ v = v * (1 - (decay / 100.0));
+ }
+ }
+ uint32_t get_grade(unsigned i)
+ {
+ if (grade_table.size() <= i)
+ return 0;
+ return grade_table[i];
+ }
uint32_t stripe_width; ///< erasure coded stripe size in bytes
@@ -1035,6 +1082,7 @@ public:
pg_num_mask(0), pgp_num_mask(0),
tier_of(-1), read_tier(-1), write_tier(-1),
cache_mode(CACHEMODE_NONE),
+ cache_measure(CACHEMEASURE_ATIME),
target_max_bytes(0), target_max_objects(0),
cache_target_dirty_ratio_micro(0),
cache_target_full_ratio_micro(0),
@@ -1044,6 +1092,7 @@ public:
hit_set_period(0),
hit_set_count(0),
min_read_recency_for_promote(0),
+ hit_set_grade_decay_rate(0),
stripe_width(0),
expected_num_objects(0)
{ }
--
1.9.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 2/5] Mon: expose commands for temperature related setting
2015-05-21 13:34 [PATCH] Osd: temperature based object eviction for cache tiering Li Wang
2015-05-21 13:34 ` [PATCH 1/5] Osd: add three fields to pg_pool_t Li Wang
@ 2015-05-21 13:34 ` Li Wang
2015-05-21 14:29 ` Joao Eduardo Luis
2015-05-21 13:34 ` [PATCH 3/5] Osd: add a temperature based object eviction policy for cache tiering Li Wang
` (2 subsequent siblings)
4 siblings, 1 reply; 8+ messages in thread
From: Li Wang @ 2015-05-21 13:34 UTC (permalink / raw)
To: Sage Weil; +Cc: ceph-devel, MingXin Liu
From: MingXin Liu <mingxinliu@ubuntukylin.com>
Signed-off-by: MingXin Liu <mingxinliu@ubuntukylin.com>
Reviewed-by: Li Wang <liwang@ubuntukylin.com>
---
src/mon/MonCommands.h | 8 +++--
src/mon/OSDMonitor.cc | 87 ++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 88 insertions(+), 7 deletions(-)
diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h
index 8a36807..b26834d 100644
--- a/src/mon/MonCommands.h
+++ b/src/mon/MonCommands.h
@@ -639,11 +639,11 @@ COMMAND("osd pool rename " \
"rename <srcpool> to <destpool>", "osd", "rw", "cli,rest")
COMMAND("osd pool get " \
"name=pool,type=CephPoolname " \
- "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|write_fadvise_dontneed|all", \
+ "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|write_fadvise_dontneed|hit_set_grade_decay_rate|all", \
"get pool parameter <var>", "osd", "r", "cli,rest")
COMMAND("osd pool set " \
"name=pool,type=CephPoolname " \
- "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|write_fadvise_dontneed " \
+ "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|write_fadvise_dontneed|hit_set_grade_decay_rate " \
"name=val,type=CephString " \
"name=force,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \
"set pool parameter <var> to <val>", "osd", "rw", "cli,rest")
@@ -695,6 +695,10 @@ COMMAND("osd tier cache-mode " \
"name=pool,type=CephPoolname " \
"name=mode,type=CephChoices,strings=none|writeback|forward|readonly|readforward|readproxy", \
"specify the caching mode for cache tier <pool>", "osd", "rw", "cli,rest")
+COMMAND("osd tier cache-measure " \
+ "name=pool,type=CephPoolname " \
+ "name=measure,type=CephChoices,strings=atime|temperature", \
+ "specify the caching measure to judge hot objects for cache tier <pool>", "osd", "rw", "cli,rest")
COMMAND("osd tier set-overlay " \
"name=pool,type=CephPoolname " \
"name=overlaypool,type=CephPoolname", \
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 10597d0..0374778 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -2803,7 +2803,7 @@ namespace {
CACHE_TARGET_DIRTY_RATIO, CACHE_TARGET_FULL_RATIO,
CACHE_MIN_FLUSH_AGE, CACHE_MIN_EVICT_AGE,
ERASURE_CODE_PROFILE, MIN_READ_RECENCY_FOR_PROMOTE,
- WRITE_FADVISE_DONTNEED};
+ WRITE_FADVISE_DONTNEED, HIT_SET_GRADE_DECAY_RATE};
std::set<osd_pool_get_choices>
subtract_second_from_first(const std::set<osd_pool_get_choices>& first,
@@ -3251,7 +3251,8 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
("cache_min_evict_age", CACHE_MIN_EVICT_AGE)
("erasure_code_profile", ERASURE_CODE_PROFILE)
("min_read_recency_for_promote", MIN_READ_RECENCY_FOR_PROMOTE)
- ("write_fadvise_dontneed", WRITE_FADVISE_DONTNEED);
+ ("write_fadvise_dontneed", WRITE_FADVISE_DONTNEED)
+ ("hit_set_grade_decay_rate", HIT_SET_GRADE_DECAY_RATE);
typedef std::set<osd_pool_get_choices> choices_set_t;
@@ -3259,7 +3260,7 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
(HIT_SET_TYPE)(HIT_SET_PERIOD)(HIT_SET_COUNT)(HIT_SET_FPP)
(TARGET_MAX_OBJECTS)(TARGET_MAX_BYTES)(CACHE_TARGET_FULL_RATIO)
(CACHE_TARGET_DIRTY_RATIO)(CACHE_MIN_FLUSH_AGE)(CACHE_MIN_EVICT_AGE)
- (MIN_READ_RECENCY_FOR_PROMOTE);
+ (MIN_READ_RECENCY_FOR_PROMOTE)(HIT_SET_GRADE_DECAY_RATE);
const choices_set_t ONLY_ERASURE_CHOICES = boost::assign::list_of
(ERASURE_CODE_PROFILE);
@@ -3389,6 +3390,10 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
f->dump_int("min_read_recency_for_promote",
p->min_read_recency_for_promote);
break;
+ case HIT_SET_GRADE_DECAY_RATE:
+ f->dump_int("hit_set_priority_decacy_rate",
+ p->hit_set_grade_decay_rate);
+ break;
case WRITE_FADVISE_DONTNEED:
f->dump_string("write_fadvise_dontneed",
p->has_flag(pg_pool_t::FLAG_WRITE_FADVISE_DONTNEED) ?
@@ -3476,6 +3481,10 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
ss << "min_read_recency_for_promote: " <<
p->min_read_recency_for_promote << "\n";
break;
+ case HIT_SET_GRADE_DECAY_RATE:
+ ss << "hit_set_grade_decay_rate: " <<
+ p->hit_set_grade_decay_rate << "\n";
+ break;
case WRITE_FADVISE_DONTNEED:
ss << "write_fadvise_dontneed: " <<
(p->has_flag(pg_pool_t::FLAG_WRITE_FADVISE_DONTNEED) ?
@@ -4466,7 +4475,8 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
var == "hit_set_count" || var == "hit_set_fpp" ||
var == "target_max_objects" || var == "target_max_bytes" ||
var == "cache_target_full_ratio" || var == "cache_target_dirty_ratio" ||
- var == "cache_min_flush_age" || var == "cache_min_evict_age")) {
+ var == "cache_min_flush_age" || var == "cache_min_evict_age" ||
+ var == "hit_set_grade_decay_rate")) {
ss << "pool '" << poolstr << "' is not a tier pool: variable not applicable";
return -EACCES;
}
@@ -4652,12 +4662,12 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
}
p.hit_set_period = n;
} else if (var == "hit_set_count") {
-
if (interr.length()) {
ss << "error parsing integer value '" << val << "': " << interr;
return -EINVAL;
}
p.hit_set_count = n;
+ p.set_grade(p.hit_set_grade_decay_rate, n);
} else if (var == "hit_set_fpp") {
if (floaterr.length()) {
ss << "error parsing floating point value '" << val << "': " << floaterr;
@@ -4723,6 +4733,17 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
return -EINVAL;
}
p.min_read_recency_for_promote = n;
+ } else if (var == "hit_set_grade_decay_rate") {
+ if (interr.length()) {
+ ss << "error parsing integer value '" << val << "': " << interr;
+ return -EINVAL;
+ }
+ if (n > 100 || n < 0) {
+ ss << "value out of range,valid range is 0 - 100";
+ return -EINVAL;
+ }
+ p.hit_set_grade_decay_rate = n;
+ p.set_grade(n, p.hit_set_count);
} else if (var == "write_fadvise_dontneed") {
if (val == "true" || (interr.empty() && n == 1)) {
p.flags |= pg_pool_t::FLAG_WRITE_FADVISE_DONTNEED;
@@ -6744,6 +6765,62 @@ done:
wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, ss.str(),
get_last_committed() + 1));
return true;
+ } else if (prefix == "osd tier cache-measure") {
+ err = check_cluster_features(CEPH_FEATURE_OSD_CACHEPOOL, ss);
+ if (err == -EAGAIN)
+ goto wait;
+ if (err)
+ goto reply;
+ string poolstr;
+ cmd_getval(g_ceph_context, cmdmap, "pool", poolstr);
+ int64_t pool_id = osdmap.lookup_pg_pool_name(poolstr);
+ if (pool_id < 0) {
+ ss << "unrecognized pool '" << poolstr << "'";
+ err = -ENOENT;
+ goto reply;
+ }
+ const pg_pool_t *p = osdmap.get_pg_pool(pool_id);
+ assert(p);
+ if (!p->is_tier()) {
+ ss << "pool '" << poolstr << "' is not a tier";
+ err = -EINVAL;
+ goto reply;
+ }
+ string measurestr;
+ cmd_getval(g_ceph_context, cmdmap, "measure", measurestr);
+ pg_pool_t::cache_measure_t measure = pg_pool_t::get_cache_measure_from_str(measurestr);
+ if (measure < 0) {
+ ss << "'" << measurestr << "' is not a valid cache measure";
+ err = -EINVAL;
+ goto reply;
+ }
+ if (p->grade_table.empty()) {
+ ss << "grade_table is empty,set hit_set and hit_set_decay_rate first";
+ err = -EINVAL;
+ goto reply;
+ }
+ if (p->hit_set_params.get_type() == HitSet::TYPE_NONE) {
+ ss << "hit_set_type cannot be none";
+ err = -EINVAL;
+ goto reply;
+ }
+
+ //pool already had this cache-measure set and there are no pending changes
+ if (p->cache_measure == measure &&
+ (pending_inc.new_pools.count(pool_id) == 0 ||
+ pending_inc.new_pools[pool_id].cache_measure == p->cache_measure)) {
+ ss << "set cache-measure for pool '" << poolstr << "'"
+ << " to " << pg_pool_t::get_cache_measure_name(measure);
+ err = 0;
+ goto reply;
+ }
+ pg_pool_t *np = pending_inc.get_new_pool(pool_id, p);
+ np->cache_measure = measure;
+ ss << "set cache-measure for pool '" << poolstr
+ << "' to " << pg_pool_t::get_cache_measure_name(measure);
+ wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, ss.str(),
+ get_last_committed() + 1));
+ return true;
} else if (prefix == "osd tier add-cache") {
err = check_cluster_features(CEPH_FEATURE_OSD_CACHEPOOL, ss);
if (err == -EAGAIN)
--
1.9.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCH 2/5] Mon: expose commands for temperature related setting
2015-05-21 13:34 ` [PATCH 2/5] Mon: expose commands for temperature related setting Li Wang
@ 2015-05-21 14:29 ` Joao Eduardo Luis
2015-05-26 1:52 ` Li Wang
0 siblings, 1 reply; 8+ messages in thread
From: Joao Eduardo Luis @ 2015-05-21 14:29 UTC (permalink / raw)
To: Li Wang, Sage Weil; +Cc: ceph-devel, MingXin Liu
As far as I can tell, this patch can be split in two different patches:
- add hit_set_grade_decay_rate option to 'osd pool set/get'
- add 'osd tier cache-measure'
Also, for the latter we could also use an explanatory commit message.
Aside from that, I don't see anything obviously wrong with the patch.
-Joao
On 05/21/2015 02:34 PM, Li Wang wrote:
> From: MingXin Liu <mingxinliu@ubuntukylin.com>
>
> Signed-off-by: MingXin Liu <mingxinliu@ubuntukylin.com>
> Reviewed-by: Li Wang <liwang@ubuntukylin.com>
> ---
> src/mon/MonCommands.h | 8 +++--
> src/mon/OSDMonitor.cc | 87 ++++++++++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 88 insertions(+), 7 deletions(-)
>
> diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h
> index 8a36807..b26834d 100644
> --- a/src/mon/MonCommands.h
> +++ b/src/mon/MonCommands.h
> @@ -639,11 +639,11 @@ COMMAND("osd pool rename " \
> "rename <srcpool> to <destpool>", "osd", "rw", "cli,rest")
> COMMAND("osd pool get " \
> "name=pool,type=CephPoolname " \
> - "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|write_fadvise_dontneed|all", \
> + "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|write_fadvise_dontneed|hit_set_grade_decay_rate|all", \
> "get pool parameter <var>", "osd", "r", "cli,rest")
> COMMAND("osd pool set " \
> "name=pool,type=CephPoolname " \
> - "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|write_fadvise_dontneed " \
> + "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|write_fadvise_dontneed|hit_set_grade_decay_rate " \
> "name=val,type=CephString " \
> "name=force,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \
> "set pool parameter <var> to <val>", "osd", "rw", "cli,rest")
> @@ -695,6 +695,10 @@ COMMAND("osd tier cache-mode " \
> "name=pool,type=CephPoolname " \
> "name=mode,type=CephChoices,strings=none|writeback|forward|readonly|readforward|readproxy", \
> "specify the caching mode for cache tier <pool>", "osd", "rw", "cli,rest")
> +COMMAND("osd tier cache-measure " \
> + "name=pool,type=CephPoolname " \
> + "name=measure,type=CephChoices,strings=atime|temperature", \
> + "specify the caching measure to judge hot objects for cache tier <pool>", "osd", "rw", "cli,rest")
> COMMAND("osd tier set-overlay " \
> "name=pool,type=CephPoolname " \
> "name=overlaypool,type=CephPoolname", \
> diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
> index 10597d0..0374778 100644
> --- a/src/mon/OSDMonitor.cc
> +++ b/src/mon/OSDMonitor.cc
> @@ -2803,7 +2803,7 @@ namespace {
> CACHE_TARGET_DIRTY_RATIO, CACHE_TARGET_FULL_RATIO,
> CACHE_MIN_FLUSH_AGE, CACHE_MIN_EVICT_AGE,
> ERASURE_CODE_PROFILE, MIN_READ_RECENCY_FOR_PROMOTE,
> - WRITE_FADVISE_DONTNEED};
> + WRITE_FADVISE_DONTNEED, HIT_SET_GRADE_DECAY_RATE};
>
> std::set<osd_pool_get_choices>
> subtract_second_from_first(const std::set<osd_pool_get_choices>& first,
> @@ -3251,7 +3251,8 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
> ("cache_min_evict_age", CACHE_MIN_EVICT_AGE)
> ("erasure_code_profile", ERASURE_CODE_PROFILE)
> ("min_read_recency_for_promote", MIN_READ_RECENCY_FOR_PROMOTE)
> - ("write_fadvise_dontneed", WRITE_FADVISE_DONTNEED);
> + ("write_fadvise_dontneed", WRITE_FADVISE_DONTNEED)
> + ("hit_set_grade_decay_rate", HIT_SET_GRADE_DECAY_RATE);
>
> typedef std::set<osd_pool_get_choices> choices_set_t;
>
> @@ -3259,7 +3260,7 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
> (HIT_SET_TYPE)(HIT_SET_PERIOD)(HIT_SET_COUNT)(HIT_SET_FPP)
> (TARGET_MAX_OBJECTS)(TARGET_MAX_BYTES)(CACHE_TARGET_FULL_RATIO)
> (CACHE_TARGET_DIRTY_RATIO)(CACHE_MIN_FLUSH_AGE)(CACHE_MIN_EVICT_AGE)
> - (MIN_READ_RECENCY_FOR_PROMOTE);
> + (MIN_READ_RECENCY_FOR_PROMOTE)(HIT_SET_GRADE_DECAY_RATE);
>
> const choices_set_t ONLY_ERASURE_CHOICES = boost::assign::list_of
> (ERASURE_CODE_PROFILE);
> @@ -3389,6 +3390,10 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
> f->dump_int("min_read_recency_for_promote",
> p->min_read_recency_for_promote);
> break;
> + case HIT_SET_GRADE_DECAY_RATE:
> + f->dump_int("hit_set_priority_decacy_rate",
> + p->hit_set_grade_decay_rate);
> + break;
> case WRITE_FADVISE_DONTNEED:
> f->dump_string("write_fadvise_dontneed",
> p->has_flag(pg_pool_t::FLAG_WRITE_FADVISE_DONTNEED) ?
> @@ -3476,6 +3481,10 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
> ss << "min_read_recency_for_promote: " <<
> p->min_read_recency_for_promote << "\n";
> break;
> + case HIT_SET_GRADE_DECAY_RATE:
> + ss << "hit_set_grade_decay_rate: " <<
> + p->hit_set_grade_decay_rate << "\n";
> + break;
> case WRITE_FADVISE_DONTNEED:
> ss << "write_fadvise_dontneed: " <<
> (p->has_flag(pg_pool_t::FLAG_WRITE_FADVISE_DONTNEED) ?
> @@ -4466,7 +4475,8 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
> var == "hit_set_count" || var == "hit_set_fpp" ||
> var == "target_max_objects" || var == "target_max_bytes" ||
> var == "cache_target_full_ratio" || var == "cache_target_dirty_ratio" ||
> - var == "cache_min_flush_age" || var == "cache_min_evict_age")) {
> + var == "cache_min_flush_age" || var == "cache_min_evict_age" ||
> + var == "hit_set_grade_decay_rate")) {
> ss << "pool '" << poolstr << "' is not a tier pool: variable not applicable";
> return -EACCES;
> }
> @@ -4652,12 +4662,12 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
> }
> p.hit_set_period = n;
> } else if (var == "hit_set_count") {
> -
> if (interr.length()) {
> ss << "error parsing integer value '" << val << "': " << interr;
> return -EINVAL;
> }
> p.hit_set_count = n;
> + p.set_grade(p.hit_set_grade_decay_rate, n);
> } else if (var == "hit_set_fpp") {
> if (floaterr.length()) {
> ss << "error parsing floating point value '" << val << "': " << floaterr;
> @@ -4723,6 +4733,17 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
> return -EINVAL;
> }
> p.min_read_recency_for_promote = n;
> + } else if (var == "hit_set_grade_decay_rate") {
> + if (interr.length()) {
> + ss << "error parsing integer value '" << val << "': " << interr;
> + return -EINVAL;
> + }
> + if (n > 100 || n < 0) {
> + ss << "value out of range,valid range is 0 - 100";
> + return -EINVAL;
> + }
> + p.hit_set_grade_decay_rate = n;
> + p.set_grade(n, p.hit_set_count);
> } else if (var == "write_fadvise_dontneed") {
> if (val == "true" || (interr.empty() && n == 1)) {
> p.flags |= pg_pool_t::FLAG_WRITE_FADVISE_DONTNEED;
> @@ -6744,6 +6765,62 @@ done:
> wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, ss.str(),
> get_last_committed() + 1));
> return true;
> + } else if (prefix == "osd tier cache-measure") {
> + err = check_cluster_features(CEPH_FEATURE_OSD_CACHEPOOL, ss);
> + if (err == -EAGAIN)
> + goto wait;
> + if (err)
> + goto reply;
> + string poolstr;
> + cmd_getval(g_ceph_context, cmdmap, "pool", poolstr);
> + int64_t pool_id = osdmap.lookup_pg_pool_name(poolstr);
> + if (pool_id < 0) {
> + ss << "unrecognized pool '" << poolstr << "'";
> + err = -ENOENT;
> + goto reply;
> + }
> + const pg_pool_t *p = osdmap.get_pg_pool(pool_id);
> + assert(p);
> + if (!p->is_tier()) {
> + ss << "pool '" << poolstr << "' is not a tier";
> + err = -EINVAL;
> + goto reply;
> + }
> + string measurestr;
> + cmd_getval(g_ceph_context, cmdmap, "measure", measurestr);
> + pg_pool_t::cache_measure_t measure = pg_pool_t::get_cache_measure_from_str(measurestr);
> + if (measure < 0) {
> + ss << "'" << measurestr << "' is not a valid cache measure";
> + err = -EINVAL;
> + goto reply;
> + }
> + if (p->grade_table.empty()) {
> + ss << "grade_table is empty,set hit_set and hit_set_decay_rate first";
> + err = -EINVAL;
> + goto reply;
> + }
> + if (p->hit_set_params.get_type() == HitSet::TYPE_NONE) {
> + ss << "hit_set_type cannot be none";
> + err = -EINVAL;
> + goto reply;
> + }
> +
> + //pool already had this cache-measure set and there are no pending changes
> + if (p->cache_measure == measure &&
> + (pending_inc.new_pools.count(pool_id) == 0 ||
> + pending_inc.new_pools[pool_id].cache_measure == p->cache_measure)) {
> + ss << "set cache-measure for pool '" << poolstr << "'"
> + << " to " << pg_pool_t::get_cache_measure_name(measure);
> + err = 0;
> + goto reply;
> + }
> + pg_pool_t *np = pending_inc.get_new_pool(pool_id, p);
> + np->cache_measure = measure;
> + ss << "set cache-measure for pool '" << poolstr
> + << "' to " << pg_pool_t::get_cache_measure_name(measure);
> + wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, ss.str(),
> + get_last_committed() + 1));
> + return true;
> } else if (prefix == "osd tier add-cache") {
> err = check_cluster_features(CEPH_FEATURE_OSD_CACHEPOOL, ss);
> if (err == -EAGAIN)
>
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH 2/5] Mon: expose commands for temperature related setting
2015-05-21 14:29 ` Joao Eduardo Luis
@ 2015-05-26 1:52 ` Li Wang
0 siblings, 0 replies; 8+ messages in thread
From: Li Wang @ 2015-05-26 1:52 UTC (permalink / raw)
To: Joao Eduardo Luis, Sage Weil; +Cc: ceph-devel, MingXin Liu
Thanks for reviewing. We will update and add reviewed-by after
the patch accepted, the follow-up discussion is at
https://github.com/ceph/ceph/pull/4737
On 2015/5/22 8:55, Joao Eduardo Luis wrote:
> As far as I can tell, this patch can be split in two different patches:
>
> - add hit_set_grade_decay_rate option to 'osd pool set/get'
> - add 'osd tier cache-measure'
>
> Also, for the latter we could also use an explanatory commit message.
>
> Aside from that, I don't see anything obviously wrong with the patch.
>
> -Joao
>
> On 05/21/2015 02:34 PM, Li Wang wrote:
>> From: MingXin Liu <mingxinliu@ubuntukylin.com>
>>
>> Signed-off-by: MingXin Liu <mingxinliu@ubuntukylin.com>
>> Reviewed-by: Li Wang <liwang@ubuntukylin.com>
>> ---
>> src/mon/MonCommands.h | 8 +++--
>> src/mon/OSDMonitor.cc | 87 ++++++++++++++++++++++++++++++++++++++++++++++++---
>> 2 files changed, 88 insertions(+), 7 deletions(-)
>>
>> diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h
>> index 8a36807..b26834d 100644
>> --- a/src/mon/MonCommands.h
>> +++ b/src/mon/MonCommands.h
>> @@ -639,11 +639,11 @@ COMMAND("osd pool rename " \
>> "rename <srcpool> to <destpool>", "osd", "rw", "cli,rest")
>> COMMAND("osd pool get " \
>> "name=pool,type=CephPoolname " \
>> - "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|write_fadvise_dontneed|all", \
>> + "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|write_fadvise_dontneed|hit_set_grade_decay_rate|all", \
>> "get pool parameter <var>", "osd", "r", "cli,rest")
>> COMMAND("osd pool set " \
>> "name=pool,type=CephPoolname " \
>> - "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|write_fadvise_dontneed " \
>> + "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|write_fadvise_dontneed|hit_set_grade_decay_rate " \
>> "name=val,type=CephString " \
>> "name=force,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \
>> "set pool parameter <var> to <val>", "osd", "rw", "cli,rest")
>> @@ -695,6 +695,10 @@ COMMAND("osd tier cache-mode " \
>> "name=pool,type=CephPoolname " \
>> "name=mode,type=CephChoices,strings=none|writeback|forward|readonly|readforward|readproxy", \
>> "specify the caching mode for cache tier <pool>", "osd", "rw", "cli,rest")
>> +COMMAND("osd tier cache-measure " \
>> + "name=pool,type=CephPoolname " \
>> + "name=measure,type=CephChoices,strings=atime|temperature", \
>> + "specify the caching measure to judge hot objects for cache tier <pool>", "osd", "rw", "cli,rest")
>> COMMAND("osd tier set-overlay " \
>> "name=pool,type=CephPoolname " \
>> "name=overlaypool,type=CephPoolname", \
>> diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
>> index 10597d0..0374778 100644
>> --- a/src/mon/OSDMonitor.cc
>> +++ b/src/mon/OSDMonitor.cc
>> @@ -2803,7 +2803,7 @@ namespace {
>> CACHE_TARGET_DIRTY_RATIO, CACHE_TARGET_FULL_RATIO,
>> CACHE_MIN_FLUSH_AGE, CACHE_MIN_EVICT_AGE,
>> ERASURE_CODE_PROFILE, MIN_READ_RECENCY_FOR_PROMOTE,
>> - WRITE_FADVISE_DONTNEED};
>> + WRITE_FADVISE_DONTNEED, HIT_SET_GRADE_DECAY_RATE};
>>
>> std::set<osd_pool_get_choices>
>> subtract_second_from_first(const std::set<osd_pool_get_choices>& first,
>> @@ -3251,7 +3251,8 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
>> ("cache_min_evict_age", CACHE_MIN_EVICT_AGE)
>> ("erasure_code_profile", ERASURE_CODE_PROFILE)
>> ("min_read_recency_for_promote", MIN_READ_RECENCY_FOR_PROMOTE)
>> - ("write_fadvise_dontneed", WRITE_FADVISE_DONTNEED);
>> + ("write_fadvise_dontneed", WRITE_FADVISE_DONTNEED)
>> + ("hit_set_grade_decay_rate", HIT_SET_GRADE_DECAY_RATE);
>>
>> typedef std::set<osd_pool_get_choices> choices_set_t;
>>
>> @@ -3259,7 +3260,7 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
>> (HIT_SET_TYPE)(HIT_SET_PERIOD)(HIT_SET_COUNT)(HIT_SET_FPP)
>> (TARGET_MAX_OBJECTS)(TARGET_MAX_BYTES)(CACHE_TARGET_FULL_RATIO)
>> (CACHE_TARGET_DIRTY_RATIO)(CACHE_MIN_FLUSH_AGE)(CACHE_MIN_EVICT_AGE)
>> - (MIN_READ_RECENCY_FOR_PROMOTE);
>> + (MIN_READ_RECENCY_FOR_PROMOTE)(HIT_SET_GRADE_DECAY_RATE);
>>
>> const choices_set_t ONLY_ERASURE_CHOICES = boost::assign::list_of
>> (ERASURE_CODE_PROFILE);
>> @@ -3389,6 +3390,10 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
>> f->dump_int("min_read_recency_for_promote",
>> p->min_read_recency_for_promote);
>> break;
>> + case HIT_SET_GRADE_DECAY_RATE:
>> + f->dump_int("hit_set_priority_decacy_rate",
>> + p->hit_set_grade_decay_rate);
>> + break;
>> case WRITE_FADVISE_DONTNEED:
>> f->dump_string("write_fadvise_dontneed",
>> p->has_flag(pg_pool_t::FLAG_WRITE_FADVISE_DONTNEED) ?
>> @@ -3476,6 +3481,10 @@ bool OSDMonitor::preprocess_command(MMonCommand *m)
>> ss << "min_read_recency_for_promote: " <<
>> p->min_read_recency_for_promote << "\n";
>> break;
>> + case HIT_SET_GRADE_DECAY_RATE:
>> + ss << "hit_set_grade_decay_rate: " <<
>> + p->hit_set_grade_decay_rate << "\n";
>> + break;
>> case WRITE_FADVISE_DONTNEED:
>> ss << "write_fadvise_dontneed: " <<
>> (p->has_flag(pg_pool_t::FLAG_WRITE_FADVISE_DONTNEED) ?
>> @@ -4466,7 +4475,8 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
>> var == "hit_set_count" || var == "hit_set_fpp" ||
>> var == "target_max_objects" || var == "target_max_bytes" ||
>> var == "cache_target_full_ratio" || var == "cache_target_dirty_ratio" ||
>> - var == "cache_min_flush_age" || var == "cache_min_evict_age")) {
>> + var == "cache_min_flush_age" || var == "cache_min_evict_age" ||
>> + var == "hit_set_grade_decay_rate")) {
>> ss << "pool '" << poolstr << "' is not a tier pool: variable not applicable";
>> return -EACCES;
>> }
>> @@ -4652,12 +4662,12 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
>> }
>> p.hit_set_period = n;
>> } else if (var == "hit_set_count") {
>> -
>> if (interr.length()) {
>> ss << "error parsing integer value '" << val << "': " << interr;
>> return -EINVAL;
>> }
>> p.hit_set_count = n;
>> + p.set_grade(p.hit_set_grade_decay_rate, n);
>> } else if (var == "hit_set_fpp") {
>> if (floaterr.length()) {
>> ss << "error parsing floating point value '" << val << "': " << floaterr;
>> @@ -4723,6 +4733,17 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap,
>> return -EINVAL;
>> }
>> p.min_read_recency_for_promote = n;
>> + } else if (var == "hit_set_grade_decay_rate") {
>> + if (interr.length()) {
>> + ss << "error parsing integer value '" << val << "': " << interr;
>> + return -EINVAL;
>> + }
>> + if (n > 100 || n < 0) {
>> + ss << "value out of range,valid range is 0 - 100";
>> + return -EINVAL;
>> + }
>> + p.hit_set_grade_decay_rate = n;
>> + p.set_grade(n, p.hit_set_count);
>> } else if (var == "write_fadvise_dontneed") {
>> if (val == "true" || (interr.empty() && n == 1)) {
>> p.flags |= pg_pool_t::FLAG_WRITE_FADVISE_DONTNEED;
>> @@ -6744,6 +6765,62 @@ done:
>> wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, ss.str(),
>> get_last_committed() + 1));
>> return true;
>> + } else if (prefix == "osd tier cache-measure") {
>> + err = check_cluster_features(CEPH_FEATURE_OSD_CACHEPOOL, ss);
>> + if (err == -EAGAIN)
>> + goto wait;
>> + if (err)
>> + goto reply;
>> + string poolstr;
>> + cmd_getval(g_ceph_context, cmdmap, "pool", poolstr);
>> + int64_t pool_id = osdmap.lookup_pg_pool_name(poolstr);
>> + if (pool_id < 0) {
>> + ss << "unrecognized pool '" << poolstr << "'";
>> + err = -ENOENT;
>> + goto reply;
>> + }
>> + const pg_pool_t *p = osdmap.get_pg_pool(pool_id);
>> + assert(p);
>> + if (!p->is_tier()) {
>> + ss << "pool '" << poolstr << "' is not a tier";
>> + err = -EINVAL;
>> + goto reply;
>> + }
>> + string measurestr;
>> + cmd_getval(g_ceph_context, cmdmap, "measure", measurestr);
>> + pg_pool_t::cache_measure_t measure = pg_pool_t::get_cache_measure_from_str(measurestr);
>> + if (measure < 0) {
>> + ss << "'" << measurestr << "' is not a valid cache measure";
>> + err = -EINVAL;
>> + goto reply;
>> + }
>> + if (p->grade_table.empty()) {
>> + ss << "grade_table is empty,set hit_set and hit_set_decay_rate first";
>> + err = -EINVAL;
>> + goto reply;
>> + }
>> + if (p->hit_set_params.get_type() == HitSet::TYPE_NONE) {
>> + ss << "hit_set_type cannot be none";
>> + err = -EINVAL;
>> + goto reply;
>> + }
>> +
>> + //pool already had this cache-measure set and there are no pending changes
>> + if (p->cache_measure == measure &&
>> + (pending_inc.new_pools.count(pool_id) == 0 ||
>> + pending_inc.new_pools[pool_id].cache_measure == p->cache_measure)) {
>> + ss << "set cache-measure for pool '" << poolstr << "'"
>> + << " to " << pg_pool_t::get_cache_measure_name(measure);
>> + err = 0;
>> + goto reply;
>> + }
>> + pg_pool_t *np = pending_inc.get_new_pool(pool_id, p);
>> + np->cache_measure = measure;
>> + ss << "set cache-measure for pool '" << poolstr
>> + << "' to " << pg_pool_t::get_cache_measure_name(measure);
>> + wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, ss.str(),
>> + get_last_committed() + 1));
>> + return true;
>> } else if (prefix == "osd tier add-cache") {
>> err = check_cluster_features(CEPH_FEATURE_OSD_CACHEPOOL, ss);
>> if (err == -EAGAIN)
>>
>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 3/5] Osd: add a temperature based object eviction policy for cache tiering
2015-05-21 13:34 [PATCH] Osd: temperature based object eviction for cache tiering Li Wang
2015-05-21 13:34 ` [PATCH 1/5] Osd: add three fields to pg_pool_t Li Wang
2015-05-21 13:34 ` [PATCH 2/5] Mon: expose commands for temperature related setting Li Wang
@ 2015-05-21 13:34 ` Li Wang
2015-05-21 13:34 ` [PATCH 4/5] Mon: add temperature support for existing cache related commands Li Wang
2015-05-21 13:34 ` [PATCH 5/5] Doc: add temperature related stuff in documents and test scripts Li Wang
4 siblings, 0 replies; 8+ messages in thread
From: Li Wang @ 2015-05-21 13:34 UTC (permalink / raw)
To: Sage Weil; +Cc: ceph-devel, MingXin Liu
From: MingXin Liu <mingxinliu@ubuntukylin.com>
Signed-off-by: MingXin Liu <mingxinliu@ubuntukylin.com>
Reviewed-by: Li Wang <liwang@ubuntukylin.com>
---
src/osd/ReplicatedPG.cc | 110 +++++++++++++++++++++++++-----------------------
1 file changed, 58 insertions(+), 52 deletions(-)
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 4c549a5..b2d49c6 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -10822,44 +10822,45 @@ bool ReplicatedPG::agent_maybe_evict(ObjectContextRef& obc)
}
}
+ int atime = -1, temp = 0;
+ uint64_t atime_upper = 0, atime_lower = 0;
+ uint64_t temp_upper = 0, temp_lower = 0;
+
if (agent_state->evict_mode != TierAgentState::EVICT_MODE_FULL) {
// is this object old and/or cold enough?
- int atime = -1, temp = 0;
- if (hit_set)
- agent_estimate_atime_temp(soid, &atime, NULL /*FIXME &temp*/);
-
- uint64_t atime_upper = 0, atime_lower = 0;
- if (atime < 0 && obc->obs.oi.mtime != utime_t()) {
- if (obc->obs.oi.local_mtime != utime_t()) {
- atime = ceph_clock_now(NULL).sec() - obc->obs.oi.local_mtime;
- } else {
- atime = ceph_clock_now(NULL).sec() - obc->obs.oi.mtime;
+ if (pool.info.cache_measure == pg_pool_t::CACHEMEASURE_ATIME) {
+ agent_estimate_atime_temp(soid, &atime, NULL);
+
+ if (atime < 0 && obc->obs.oi.mtime != utime_t()) {
+ if (obc->obs.oi.local_mtime != utime_t()) {
+ atime = ceph_clock_now(NULL).sec() - obc->obs.oi.local_mtime;
+ } else {
+ atime = ceph_clock_now(NULL).sec() - obc->obs.oi.mtime;
+ }
}
- }
- if (atime < 0) {
- if (hit_set) {
- atime = pool.info.hit_set_period * pool.info.hit_set_count; // "infinite"
- } else {
- atime_upper = 1000000;
+ if (atime < 0) {
+ if (hit_set) {
+ atime = pool.info.hit_set_period * pool.info.hit_set_count; // "infinite"
+ } else {
+ atime_upper = 1000000;
+ }
}
- }
- if (atime >= 0) {
- agent_state->atime_hist.add(atime);
- agent_state->atime_hist.get_position_micro(atime, &atime_lower,
+ if (atime >= 0) {
+ agent_state->atime_hist.add(atime);
+ agent_state->atime_hist.get_position_micro(atime, &atime_lower,
&atime_upper);
- }
+ }
+ } else {
+ agent_estimate_atime_temp(soid, NULL, &temp);
- unsigned temp_upper = 0, temp_lower = 0;
- /*
- // FIXME: bound atime based on creation time?
- agent_state->temp_hist.add(atime);
- agent_state->temp_hist.get_position_micro(temp, &temp_lower, &temp_upper);
- */
+ agent_state->temp_hist.add(temp);
+ agent_state->temp_hist.get_position_micro(temp, &temp_lower, &temp_upper);
+ }
dout(20) << __func__
- << " atime " << atime
+ << " atime " << atime
<< " pos " << atime_lower << "-" << atime_upper
- << ", temp " << temp
+ <<" temp " << temp
<< " pos " << temp_lower << "-" << temp_upper
<< ", evict_effort " << agent_state->evict_effort
<< dendl;
@@ -10872,9 +10873,10 @@ bool ReplicatedPG::agent_maybe_evict(ObjectContextRef& obc)
delete f;
*_dout << dendl;
- // FIXME: ignore temperature for now.
-
- if (1000000 - atime_upper >= agent_state->evict_effort)
+ if (pool.info.cache_measure == pg_pool_t::CACHEMEASURE_ATIME) {
+ if(1000000 - atime_upper >= agent_state->evict_effort)
+ return false;
+ } else if (temp_lower >= agent_state->evict_effort)
return false;
}
@@ -11124,29 +11126,33 @@ void ReplicatedPG::agent_estimate_atime_temp(const hobject_t& oid,
int *atime, int *temp)
{
assert(hit_set);
- *atime = -1;
- if (temp)
- *temp = 0;
- if (hit_set->contains(oid)) {
- *atime = 0;
- if (temp)
- ++(*temp);
- else
+ if (atime) {
+ *atime = -1;
+ if (hit_set->contains(oid)) {
+ *atime = 0;
return;
- }
- time_t now = ceph_clock_now(NULL).sec();
- for (map<time_t,HitSetRef>::reverse_iterator p =
- agent_state->hit_set_map.rbegin();
- p != agent_state->hit_set_map.rend();
- ++p) {
- if (p->second->contains(oid)) {
- if (*atime < 0)
- *atime = now - p->first;
- if (temp)
- ++(*temp);
- else
+ }
+ time_t now = ceph_clock_now(NULL).sec();
+ for (map<time_t,HitSetRef>::reverse_iterator p =
+ agent_state->hit_set_map.rbegin();
+ p != agent_state->hit_set_map.rend(); ++p) {
+ if (p->second->contains(oid))
+ *atime = now - p->first;
+ if (*atime >= 0)
return;
}
+ } else if (temp) {
+ *temp = 0;
+ unsigned i = 0;
+ if (hit_set->contains(oid))
+ *temp += pool.info.get_grade(0);
+ for (map<time_t,HitSetRef>::reverse_iterator p =
+ agent_state->hit_set_map.rbegin();
+ p != agent_state->hit_set_map.rend(); ++p) {
+ ++i;
+ if (p->second->contains(oid))
+ *temp += pool.info.get_grade(i);
+ }
}
}
--
1.9.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 4/5] Mon: add temperature support for existing cache related commands
2015-05-21 13:34 [PATCH] Osd: temperature based object eviction for cache tiering Li Wang
` (2 preceding siblings ...)
2015-05-21 13:34 ` [PATCH 3/5] Osd: add a temperature based object eviction policy for cache tiering Li Wang
@ 2015-05-21 13:34 ` Li Wang
2015-05-21 13:34 ` [PATCH 5/5] Doc: add temperature related stuff in documents and test scripts Li Wang
4 siblings, 0 replies; 8+ messages in thread
From: Li Wang @ 2015-05-21 13:34 UTC (permalink / raw)
To: Sage Weil; +Cc: ceph-devel, MingXin Liu
From: MingXin Liu <mingxinliu@ubuntukylin.com>
Signed-off-by: MingXin Liu <mingxinliu@ubuntukylin.com>
Reviewed-by: Li Wang <liwang@ubuntukylin.com>
---
src/common/config_opts.h | 2 ++
src/mon/OSDMonitor.cc | 14 +++++++++++++-
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index e79eeaa..f661cbc 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -550,10 +550,12 @@ OPTION(osd_hit_set_max_size, OPT_INT, 100000) // max target size for a HitSet
OPTION(osd_hit_set_namespace, OPT_STR, ".ceph-internal") // rados namespace for hit_set tracking
OPTION(osd_tier_default_cache_mode, OPT_STR, "writeback")
+OPTION(osd_tier_default_cache_measure, OPT_STR, "atime")
OPTION(osd_tier_default_cache_hit_set_count, OPT_INT, 4)
OPTION(osd_tier_default_cache_hit_set_period, OPT_INT, 1200)
OPTION(osd_tier_default_cache_hit_set_type, OPT_STR, "bloom")
OPTION(osd_tier_default_cache_min_read_recency_for_promote, OPT_INT, 1) // number of recent HitSets the object must appear in to be promoted (on read)
+OPTION(osd_tier_default_cache_hit_set_grade_decay_rate, OPT_INT, 50)
OPTION(osd_map_dedup, OPT_BOOL, true)
OPTION(osd_map_max_advance, OPT_INT, 200) // make this < cache_size!
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 0374778..ac84b56 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -2723,13 +2723,15 @@ void OSDMonitor::get_health(list<pair<health_status_t,string> >& summary,
p != osdmap.pools.end();
++p) {
const pg_pool_t& info = p->second;
- if (info.cache_mode_requires_hit_set() &&
+ if ((info.cache_mode_requires_hit_set() ||
+ info.cache_measure == pg_pool_t::CACHEMEASURE_TEMP) &&
info.hit_set_params.get_type() == HitSet::TYPE_NONE) {
++problem_cache_pools;
if (detail) {
ostringstream ss;
ss << "pool '" << osdmap.get_pool_name(p->first)
<< "' with cache_mode " << info.get_cache_mode_name()
+ << " cache_measure " << info.get_cache_measure_name()
<< " needs hit_set_type to be set but it is not";
detail->push_back(make_pair(HEALTH_WARN, ss.str()));
}
@@ -6874,6 +6876,13 @@ done:
err = -EINVAL;
goto reply;
}
+ string measurestr = g_conf->osd_tier_default_cache_measure;
+ pg_pool_t::cache_measure_t measure = pg_pool_t::get_cache_measure_from_str(measurestr);
+ if (measure < 0) {
+ ss << "osd tier cache default measure '" << measurestr << "' is not a valid cache measure";
+ err = -EINVAL;
+ goto reply;
+ }
HitSet::Params hsp;
if (g_conf->osd_tier_default_cache_hit_set_type == "bloom") {
BloomHitSet::Params *bsp = new BloomHitSet::Params;
@@ -6902,11 +6911,14 @@ done:
np->set_snap_epoch(pending_inc.epoch); // tier will update to our snap info
ntp->tier_of = pool_id;
ntp->cache_mode = mode;
+ ntp->cache_measure = measure;
ntp->hit_set_count = g_conf->osd_tier_default_cache_hit_set_count;
ntp->hit_set_period = g_conf->osd_tier_default_cache_hit_set_period;
ntp->min_read_recency_for_promote = g_conf->osd_tier_default_cache_min_read_recency_for_promote;
+ ntp->hit_set_grade_decay_rate = g_conf->osd_tier_default_cache_hit_set_grade_decay_rate;
ntp->hit_set_params = hsp;
ntp->target_max_bytes = size;
+ ntp->set_grade(ntp->hit_set_grade_decay_rate, ntp->hit_set_count);
ss << "pool '" << tierpoolstr << "' is now (or already was) a cache tier of '" << poolstr << "'";
wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, ss.str(),
get_last_committed() + 1));
--
1.9.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 5/5] Doc: add temperature related stuff in documents and test scripts
2015-05-21 13:34 [PATCH] Osd: temperature based object eviction for cache tiering Li Wang
` (3 preceding siblings ...)
2015-05-21 13:34 ` [PATCH 4/5] Mon: add temperature support for existing cache related commands Li Wang
@ 2015-05-21 13:34 ` Li Wang
4 siblings, 0 replies; 8+ messages in thread
From: Li Wang @ 2015-05-21 13:34 UTC (permalink / raw)
To: Sage Weil; +Cc: ceph-devel, MingXin Liu
From: MingXin Liu <mingxinliu@ubuntukylin.com>
Signed-off-by: MingXin Liu <mingxinliu@ubuntukylin.com>
Reviewed-by: Li Wang <liwang@ubuntukylin.com>
---
doc/dev/cache-pool.rst | 4 ++++
doc/man/8/ceph.rst | 12 +++++++++---
doc/rados/operations/pools.rst | 7 +++++++
qa/workunits/cephtool/test.sh | 14 ++++++++++++++
4 files changed, 34 insertions(+), 3 deletions(-)
diff --git a/doc/dev/cache-pool.rst b/doc/dev/cache-pool.rst
index f44cbd9..d3b6257 100644
--- a/doc/dev/cache-pool.rst
+++ b/doc/dev/cache-pool.rst
@@ -179,5 +179,9 @@ the cache tier::
ceph osd pool set foo-hot cache_min_evict_age 1800 # 30 minutes
+You can specify the objects evict policy(cache-measure),when cache-measure is set as atime
+the most recent objects are hotter than others,if use temperature as measure agent will consider
+both access time and frequency::
+ ceph osd tier cache-measure foo-hot <atime|temperature>
diff --git a/doc/man/8/ceph.rst b/doc/man/8/ceph.rst
index f950221..53133d8 100644
--- a/doc/man/8/ceph.rst
+++ b/doc/man/8/ceph.rst
@@ -45,7 +45,7 @@ Synopsis
| **ceph** **osd** **pool** [ *create* \| *delete* \| *get* \| *get-quota* \| *ls* \| *mksnap* \| *rename* \| *rmsnap* \| *set* \| *set-quota* \| *stats* ] ...
-| **ceph** **osd** **tier** [ *add* \| *add-cache* \| *cache-mode* \| *remove* \| *remove-overlay* \| *set-overlay* ] ...
+| **ceph** **osd** **tier** [ *add* \| *add-cache* \| *cache-mode* \| *cache-measure* \| *remove* \| *remove-overlay* \| *set-overlay* ] ...
| **ceph** **pg** [ *debug* \| *deep-scrub* \| *dump* \| *dump_json* \| *dump_pools_json* \| *dump_stuck* \| *force_create_pg* \| *getmap* \| *ls* \| *ls-by-osd* \| *ls-by-pool* \| *ls-by-primary* \| *map* \| *repair* \| *scrub* \| *send_pg_creates* \| *set_full_ratio* \| *set_nearfull_ratio* \| *stat* ] ...
@@ -878,7 +878,7 @@ Only for tiered pools::
ceph osd pool get <poolname> hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|
target_max_objects|target_max_bytes|cache_target_dirty_ratio|
cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|
- min_read_recency_for_promote
+ min_read_recency_for_promote|hit_set_grade_decay_rate
Only for erasure coded pools::
@@ -927,7 +927,7 @@ Usage::
hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|debug_fake_ec_pool|
target_max_bytes|target_max_objects|cache_target_dirty_ratio|
cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|
- min_read_recency_for_promote|write_fadvise_dontneed
+ min_read_recency_for_promote|write_fadvise_dontneed|hit_set_grade_decay_rate
<val> {--yes-i-really-mean-it}
Subcommand ``set-quota`` sets object or byte limit on pool.
@@ -1049,6 +1049,12 @@ Usage::
ceph osd tier cache-mode <poolname> none|writeback|forward|readonly|
readforward|readproxy
+Subcommand ``cache-measure`` specifies the caching measure for cache tier <pool>.
+
+Usage::
+
+ ceph osd tier cache-measure <poolname> atime|temperature
+
Subcommand ``remove`` removes the tier <tierpool> (the second one) from base pool
<pool> (the first one).
diff --git a/doc/rados/operations/pools.rst b/doc/rados/operations/pools.rst
index 36b9c94..2c6deab 100644
--- a/doc/rados/operations/pools.rst
+++ b/doc/rados/operations/pools.rst
@@ -374,6 +374,13 @@ You may set values for the following keys:
:Example: ``1000000`` #1M objects
+``hit_set_grade_decay_rate``
+:Description: Temperature grade decay rate between a hit_set and the follow one
+:Type: Integer
+:Valid Range: 0 - 100
+:Default: ``50``
+
+
``cache_min_flush_age``
:Description: The time (in seconds) before the cache tiering agent will flush
diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh
index 15d4e73..c51592a 100755
--- a/qa/workunits/cephtool/test.sh
+++ b/qa/workunits/cephtool/test.sh
@@ -415,6 +415,20 @@ function test_tiering()
ceph osd pool delete cache5 cache5 --yes-i-really-really-mean-it
ceph osd pool delete basepoolB basepoolB --yes-i-really-really-mean-it
ceph osd pool delete basepoolA basepoolA --yes-i-really-really-mean-it
+
+ #cache-measure
+ ceph osd pool create Mbase1 2
+ ceph osd pool create Mcache1 2
+ ceph osd tier add Mbase1 Mcache1
+ ceph osd pool set Mcache1 hit_set_type bloom
+ ceph osd pool set Mcache1 hit_set_count 4
+ ceph osd pool set Mcache1 hit_set_period 1200
+ ceph osd pool set Mcache1 hit_set_grade_decay_rate 3
+ ceph osd tier cache-mode writeback
+ ceph osd tier cache-measure temperature
+ ceph osd tier set-overlay Mbase1 Mcache1
+ ceph osd tier cache-measure atime
+ ceph osd tier cache-measure temperature
}
function test_auth()
--
1.9.1
^ permalink raw reply related [flat|nested] 8+ messages in thread