{"id":1682,"date":"2020-08-17T21:51:59","date_gmt":"2020-08-17T13:51:59","guid":{"rendered":"https:\/\/blog.dynox.cn\/?p=1682"},"modified":"2023-08-07T00:54:14","modified_gmt":"2023-08-06T16:54:14","slug":"%e9%80%9a%e8%bf%87tsc%e8%a7%82%e5%af%9fcpu","status":"publish","type":"post","link":"https:\/\/blog.dynox.cn\/?p=1682","title":{"rendered":"\u901a\u8fc7TSC\u89c2\u5bdfCPU"},"content":{"rendered":"<div class=\"gruber-markdown\"><p>\u81eaPentium\u5f00\u59cbx86 CPU\u5747\u5f15\u5165TSC\u4e86\uff0c\u53ef\u63d0\u4f9b\u6307\u4ee4\u7ea7\u6267\u884c\u65f6\u95f4\u5ea6\u91cf\u768464\u4f4d\u65f6\u95f4\u6233\u8ba1\u6570\u5bc4\u5b58\u5668\uff0c\u968f\u7740CPU\u65f6\u949f\u81ea\u52a8\u589e\u52a0\u3002<\/p>\n<h3>CPU\u6307\u4ee4\uff1a<\/h3>\n<p>rdtsc: Real Time-Stamp Counter\nrdtscp: Real Time-Stamp Counter and Processor ID<\/p>\n<h3>\u8c03\u7528:<\/h3>\n<p><strong>Microsoft Visual C++:<\/strong><\/p>\n<pre><code class=\"prettyprint\" class=\"language-c\">unsigned __int64 __rdtsc();\nunsigned __int64 __rdtscp( unsigned int * AUX );<\/code><\/pre>\n<p><strong>Linux &amp; gcc :<\/strong><\/p>\n<pre><code class=\"prettyprint\" class=\"language-c\">extern __inline unsigned long long\n__attribute__((__gnu_inline__, __always_inline__, __artificial__))\n__rdtsc (void) {\n  return __builtin_ia32_rdtsc ();\n}\nextern __inline unsigned long long\n__attribute__((__gnu_inline__, __always_inline__, __artificial__))\n__rdtscp (unsigned int *__A)\n{\n  return __builtin_ia32_rdtscp (__A);\n}<\/code><\/pre>\n<h3>\u793a\u4f8b\uff1a<\/h3>\n<p>1: L1 cache\u53ca\u5185\u5b58\u7684\u5ef6\u8fdf\u6d4b\u91cf:<\/p>\n<p>\u4ee3\u7801\uff1a<\/p>\n<pre><code class=\"prettyprint\" class=\"language-c\">{\n    ......\n    \/* flush cache line *\/\n    _mm_clflush(&amp;amp;data[0]);\n\n    \/* measure cache miss latency *\/\n    ts = rdtscp(&amp;amp;ui);\n    m |= data[0];\n    te = rdtscp(&amp;amp;ui);\n    CALC_MIN(csci[0], ts, te);\n\n    \/* measure cache hit latency *\/\n    ts = rdtscp(&amp;amp;ui);\n    m &amp;amp;= data[0];\n    te = rdtscp(&amp;amp;ui);\n      \/* flush cache line *\/\n    _mm_clflush(&amp;amp;data[0]);\n\n    \/* measure cache miss latency *\/\n    ts = rdtscp(&amp;amp;ui);\n    m |= data[0];\n    te = rdtscp(&amp;amp;ui);\n    CALC_MIN(csci[0], ts, te);\n\n    \/* measure cache hit latency *\/\n    ts = rdtscp(&amp;amp;ui);\n    m &amp;amp;= data[0];\n    te = rdtscp(&amp;amp;ui);\n    CALC_MIN(csci[1], ts, te);\n    CALC_MIN(csci[1], ts, te);\n}<\/code><\/pre>\n<p>\u7ed3\u679c\uff1a<\/p>\n<p>rdtscp\u6307\u4ee4\u81ea\u8eab\u8017\u65f6\uff1aX86: 31 X64: 33<\/p>\n<table>\n<thead>\n<tr>\n<th>\u67b6\u6784<\/th>\n<th>X86<\/th>\n<th><\/th>\n<th><\/th>\n<th><\/th>\n<th>X64<\/th>\n<th><\/th>\n<th><\/th>\n<th><\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<td>\u957f\u5ea6<\/td>\n<td>BYTE<\/td>\n<td>WORD<\/td>\n<td>DWORD<\/td>\n<td>QWORD<\/td>\n<td>BYTE<\/td>\n<td>WORD<\/td>\n<td>DWORD<\/td>\n<td>QWORD<\/td>\n<\/tr>\n<tr>\n<td>\u51b7\uff1a\u5185\u5b58<\/td>\n<td>244<\/td>\n<td>241<\/td>\n<td>246<\/td>\n<td>250<\/td>\n<td>254<\/td>\n<td>254<\/td>\n<td>260<\/td>\n<td>261<\/td>\n<\/tr>\n<tr>\n<td>\u70ed\uff1aL1<\/td>\n<td>31<\/td>\n<td>31<\/td>\n<td>31<\/td>\n<td>31<\/td>\n<td>35<\/td>\n<td>35<\/td>\n<td>35<\/td>\n<td>35<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>\u95ee\u9898\uff1a\u8bfb\u53d61\u4e2a\u5b57\u8282\u4e0e8\u4e2a\u5b57\u8282\u7684\u6240\u7528\u7684\u65f6\u95f4\u662f\u4e00\u6837\u7684\uff0c\u4e3a\u4ec0\u4e48\uff1f<\/p>\n<p>2: \u5e38\u89c1\u6574\u578b\u8fd0\u7b97\u53ca\u591a\u6761\u6307\u4ee4\u6267\u884c\u5468\u671f:<\/p>\n<p>\u4ee3\u7801\uff1a<\/p>\n<pre><code class=\"prettyprint\" class=\"language-c\">{\n    ......\n    \/* measure mul latency *\/\n    ts = rdtscp(&amp;amp;ui);\n    m *= *((U32 *)&amp;amp;data[0]);\n    te = rdtscp(&amp;amp;ui);\n    CALC_MIN(csci[2], ts, te);\n\n    \/* measure div latnecy *\/\n    ts = rdtscp(&amp;amp;ui);\n    m \/= *((U32 *)&amp;amp;data[0]);\n    te = rdtscp(&amp;amp;ui);\n    CALC_MIN(csci[3], ts, te);\n\n    \/* measure 2*mul latnecy *\/\n    ts = rdtscp(&amp;amp;ui);\n    m *= *((U32 *)&amp;amp;data[0]);\n    m *= *((U32 *)&amp;amp;data[0]);\n    te = rdtscp(&amp;amp;ui);\n    CALC_MIN(csci2[0], ts, te);\n\n    \/* double div *\/\n    ts = rdtscp(&amp;amp;ui);\n    m \/= *((U32 *)&amp;amp;data[0]);\n    m \/= *((U32 *)&amp;amp;data[0]);\n    te = rdtscp(&amp;amp;ui);\n    CALC_MIN(csci2[1], ts, te);\n\n    \/* mul + div *\/\n    ts = rdtscp(&amp;amp;ui);\n    m *= *((U32 *)&amp;amp;data[0]);\n    m \/= *((U32 *)&amp;amp;data[0]);\n    te = rdtscp(&amp;amp;ui);\n    CALC_MIN(csci2[2], ts, te);\n\n    \/* measure float mul latency *\/\n    ts = rdtscp(&amp;amp;ui);\n    f = f * m;\n    te = rdtscp(&amp;amp;ui);\n    CALC_MIN(csci[4], ts, te);\n\n    \/* measure float div latency *\/\n    while (!m)\n        m = rand();\n    ts = rdtscp(&amp;amp;ui);\n    f = f \/ m;\n    te = rdtscp(&amp;amp;ui);\n    CALC_MIN(csci[5], ts, te);\n}<\/code><\/pre>\n<p>\u7ed3\u679c\uff1a<\/p>\n<table>\n<thead>\n<tr>\n<th><strong>\u6307\u4ee4\u5468\u671f<\/strong><\/th>\n<th><\/th>\n<th><\/th>\n<th><\/th>\n<th><\/th>\n<th><\/th>\n<th><\/th>\n<th><\/th>\n<th><\/th>\n<th><\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<td>\u6570\u636e\u7c7b\u578b<\/td>\n<td>\u6574\u578b<\/td>\n<td>\u6d6e\u70b9\u6570<\/td>\n<td><\/td>\n<td><\/td>\n<td><\/td>\n<td><\/td>\n<td><\/td>\n<td><\/td>\n<td><\/td>\n<\/tr>\n<tr>\n<td>\u6307\u4ee4\u7ec4\u5408<\/td>\n<td>m*<\/td>\n<td>m\/<\/td>\n<td>m<em>, m<\/em><\/td>\n<td>m<em>, n<\/em><\/td>\n<td>m\/, m\/<\/td>\n<td>m\/, n\/<\/td>\n<td>m*, m\/<\/td>\n<td>f*<\/td>\n<td>f\/<\/td>\n<\/tr>\n<tr>\n<td>\u6307\u884c\u65f6\u95f4<\/td>\n<td>2<\/td>\n<td>20<\/td>\n<td>4<\/td>\n<td>4<\/td>\n<td>48<\/td>\n<td>26<\/td>\n<td>24<\/td>\n<td>17<\/td>\n<td>26<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>\u95ee\u9898\uff1am\u53can\u7684\u9664\u6cd5\u8fd0\u7b97\u7684\u8017\u65f6\u53ea\u6bd4m\u7684\u9664\u6cd5\u591a\u4e86\u4e00\u70b9\uff0c\u4f46\u5374\u660e\u663e\u5c11\u4e8em\u7684\u4e24\u6b21\u9664\u6cd5\uff0c\u4e3a\u4ec0\u4e48\uff1f<\/p>\n<h3>\u6ce8\u610f\u4e8b\u9879\uff1a<\/h3>\n<ol>\n<li>\u8003\u8651\u5230CPU\u4e71\u5e8f\u6267\u884c\u7684\u95ee\u9898\uff0crdtsc\u9700\u8981\u914d\u5408cpuid\u6216lfence\u6307\u4ee4\uff0c\u4ee5\u4fdd\u8bc1\u8ba1\u8fd9\u4e00\u523b\u6d41\u6c34\u7ebf\u5df2\u6392\u7a7a\uff0c\u5373rdtsc\u8981\u6d4b\u91cf\u7684\u6307\u4ee4\u5df2\u6267\u884c\u5b8c\u3002\u540e\u6765\u7684CPU\u63d0\u4f9b\u4e86rdtscp\u6307\u4ee4\uff0c\u76f8\u5f53\u4e8ecpuid + rdtsc\uff0c\u4f46cpuid\u6307\u4ee4\u672c\u8eab\u7684\u6267\u884c\u5468\u671f\u6709\u6ce2\u52a8\uff0c\u800crdtscp\u6307\u4ee4\u7684\u6267\u884c\u66f4\u7a33\u5b9a\u3002\u4e0d\u8fc7rdtscp\u4e0d\u662f\u6240\u6709\u7684CPU\u90fd\u652f\u6301\uff0c\u4f7f\u7528\u524d\u8981\u901a\u8fc7cpuid\u6307\u4ee4\u67e5\u8be2\u662f\u4e0d\u662f\u652f\u6301\uff1a \u5373CPUID.80000001H:EDX.RDTSCP[bit 27]\u662f\u4e0d\u662f\u4e3a1<\/li>\n<li>\u591a\u6838\u7cfb\u7edf\uff1a\u65b0\u7684CPU\u652f\u6301\u4e86Invariant TSC\u7279\u6027\uff0c\u53ef\u4ee5\u4fdd\u8bc1\u5728\u9ed8\u8ba4\u60c5\u51b5\u4e0b\u5404\u6838\u5fc3\u770b\u5230\u7684TSC\u662f\u4e00\u81f4\u7684\uff0c\u5426\u5219\u6d4b\u91cf\u4ee3\u7801\u6267\u884c\u65f6\u4e0d\u80fd\u8c03\u5ea6\u81f3\u5176\u5b83\u6838\u5fc3\u4e0a\u3002\u53e6\u5916TSC\u662f\u53ef\u4ee5\u901a\u8fc7MSR\u6765\u4fee\u6539\u7684\uff0c\u8fd9\u79cd\u60c5\u51b5\u4e0b\u4e5f\u8981\u6ce8\u610f\uff1a\n<code>Invariant TSC: Software can modify the value of the time-stamp counter (TSC) of a logical processor by using the WRMSR instruction to write to the IA32_TIME_STAMP_COUNTER MSR<\/code><\/li>\n<li>CPU\u964d\u9891\u95ee\u9898\uff1a\u7b2c\u4e00\u4ee3TSC\u7684\u5b9e\u73b0\u662fVarient TSC\uff0c\u6ca1\u6709\u8003\u8651\u5230\u964d\u9891\u7684\u95ee\u9898\uff0c\u6545\u5728\u4f4e\u529f\u8017TSC\u8ba1\u6570\u4f1a\u53d8\u6162\uff0c\u751a\u81f3\u505c\u6b62\uff1b\u540e\u6765\u53c8\u6709\u4e86Constant TSC\uff0c\u89e3\u51b3\u4e86\u964d\u9891\u7684\u95ee\u9898\uff0c\u4f46\u5728DEEP-C\u72b6\u6001\u4e0b\u4f9d\u7136\u4f1a\u53d1\u751f\u505c\u6b62\u8ba1\u6570\u7684\u60c5\u51b5\uff0c\u6240\u4ee5\u53c8\u6709\u4e86\u6700\u65b0\u7684Invariant TSC\u7684\u7279\u6027\uff1a\n<code>The time stamp counter in newer processors may support an enhancement, referred to as invariant TSC. Processor\u2019s support for invariant TSC is indicated by CPUID.80000007H:EDX[8]. The invariant TSC will run at a constant rate in all ACPI P-, C-. and T-states. This is the architectural behavior moving forward. On processors with invariant TSC support, the OS may use the TSC for wall clock timer services (instead of ACPI or HPET timers). TSC reads are much more efficient and do not incur the overhead associated with a ring transition or access to a platform resource.<\/code><\/li>\n<li>\u6307\u4ee4\u672c\u8eab\u7684\u65f6\u95f4\u5f00\u9500\n<code>Pentinum Gold G5500T: 31 cycles Core i7-7820HQ: 25 cycles<\/code><\/li>\n<li>\u6743\u9650\u95ee\u9898\uff08\u6b64\u6307\u4ee4\u53ef\u7528\u4e8e\u65f6\u5e8f\u653b\u51fb\uff0c\u5982Meltdown\u53caSpectre\uff09:\n<code>CR4.TSD: Time Stamp Disable (bit 2 of CR4) \u2014 Restricts the execution of the RDTSC instruction to procedures running at privilege level 0 when set; allows RDTSC instruction to be executed at any privilege level when clear. This bit also applies to the RDTSCP instruction if supported (if CPUID.80000001H:EDX[27] = 1).<\/code><\/li>\n<li>\u8ba1\u6570\u5668\u6ea2\u51fa\u53ef\u80fd\uff1a\u8ba1\u7b97\u5668\u672c\u8eab\u662f64\u4f4d\u7684\uff0c\u5373\u4f7f\u662f\u4e3b\u98914G\u7684CPU\uff0c\u4e5f\u8981100\u591a\u5e74\u624d\u4f1a\u6ea2\u51fa\uff0c\u5bf9\u4e8e\u6211\u4eec\u7684\u6d4b\u91cf\u6765\u8bf4\u53ef\u4ee5\u4e0d\u7528\u8003\u8651<\/li>\n<li>\u65f6\u5e8f\u6d4b\u91cf\u5bb9\u6613\u88ab\u5e72\u6270\uff08\u7ebf\u7a0b\u8c03\u5ea6\u3001\u62a2\u5360\u3001\u7cfb\u7edf\u4e2d\u65ad\u3001\u865a\u62df\u5316\u7b49\uff09\uff0c\u8981\u6c42\u6d4b\u91cf\u7684\u6307\u4ee4\u5e8f\u5217\u5c3d\u91cf\u77ed\uff0c\u5e76\u4e14\u9700\u8981\u8fdb\u884c\u591a\u6b21\u6d4b\u91cf<\/li>\n<\/ol><\/div>","protected":false},"excerpt":{"rendered":"<p>\u81eaPentium\u5f00\u59cbx86 CPU\u5747\u5f15\u5165TSC\u4e86\uff0c\u53ef\u63d0\u4f9b\u6307\u4ee4\u7ea7\u6267\u884c\u65f6\u95f4\u5ea6\u91cf\u768464\u4f4d\u65f6\u95f4\u6233\u8ba1\u6570\u5bc4\u5b58\u5668\uff0c\u968f\u7740CPU [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"ngg_post_thumbnail":0,"footnotes":""},"categories":[9],"tags":[622,624,625,623,621,394],"views":833,"_links":{"self":[{"href":"https:\/\/blog.dynox.cn\/index.php?rest_route=\/wp\/v2\/posts\/1682"}],"collection":[{"href":"https:\/\/blog.dynox.cn\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/blog.dynox.cn\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/blog.dynox.cn\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/blog.dynox.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1682"}],"version-history":[{"count":1,"href":"https:\/\/blog.dynox.cn\/index.php?rest_route=\/wp\/v2\/posts\/1682\/revisions"}],"predecessor-version":[{"id":1683,"href":"https:\/\/blog.dynox.cn\/index.php?rest_route=\/wp\/v2\/posts\/1682\/revisions\/1683"}],"wp:attachment":[{"href":"https:\/\/blog.dynox.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1682"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/blog.dynox.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1682"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/blog.dynox.cn\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1682"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}