經典場景示例
更新時間 2023-12-20 18:53:05
最近更新時間: 2023-12-20 18:53:05
分享文章
本頁面介紹云數據庫ClickHouse單副本實例分布式數據操作的經典場景示例。
本節介紹云數據庫ClickHouse的經典場景示例,示例數據集由一個表組成,這個表包含有關Yandex.Metrica的匹配(hits_v1)的匿名數據。數據集可以作為壓縮的tsv.xz文件下載或作為準備好的分區下載。 下載TSV文件,并且將數據導入數據庫。 如果直接下載tsv數據較慢,可以先使用其他工具或源下載數據文件,再使用xz -d hits_v1.tsv.xz 命令解壓 ,然后再導入。
單節點示例
下載數據包并解壓
curl //clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads= `nproc` > hits_v1.tsv
創建數據庫
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets"
創建表
clickhouse-client --query "CREATE TABLE datasets.hits_v1 ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192";
導入數據
cat hits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.hits_v1 FORMAT TSV" --max_insert_block_size=100000
統計數據量
clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1"
兩節點單副本示例
命令行客戶端連接集群任意節點,連接命令如下:
clickhouse-client -h A.A.A.A --port 8123 --user username --password yourPassword
創建數據庫和表
CREATE DATABASE IF NOT EXISTS datasets ON CLUSTER 'inst_f5040618_2shards_1replicas'
創建本地表 hits_v1 建表語句添加ON CLUSTER 關鍵字后,在任意節點上執行建表語句都會為集群所有節點創建該表。
CREATE TABLE datasets.hits_v1 ON CLUSTER 'inst_f5040618_2shards_1replicas' ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/datasets/hits_v1','{replica}') PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192;
創建分布式表
CREATE TABLE IF NOT EXISTS datasets.hits_all ON CLUSTER 'inst_f5040618_2shards_1replicas'AS datasets.hits_v1 ENGINE = Distributed( 'inst_f5040618_2shards_1replicas',datasets,hits_v1,rand());
導入數據
cat hits_v1.tsv | clickhouse-client -h A.A.A.A --port 8123 --user username --password yourPassword --query "INSERT INTO datasets.hits_all FORMAT TSV" --max_insert_block_size=100000;
統計數據量
localhost :) select count(*) from datasets.hits_all;
SELECT count(*)
FROM datasets.hits_all
┌─count()─┐
│ 8873898 │
└─────────┘
1 rows in set. Elapsed: 0.006 sec.
localhost :) select count(*) from datasets.hits_v1;
SELECT count(*)
FROM datasets.hits_v1
┌─count()─┐
│ 4435304 │
└─────────┘
1 rows in set. Elapsed: 0.002 sec.
條件查詢示例
localhost :) SELECT count() FROM datasets.hits_all WHERE CounterID < 10000;
SELECT count()
FROM datasets.hits_all
WHERE CounterID < 10000
┌─count()─┐
│ 32670 │
└─────────┘
1 rows in set. Elapsed: 0.147 sec. Processed 73.73 thousand rows, 294.91 KB (499.99 thousand rows/s., 2.00 MB/s.)
localhost :) SELECT count() FROM datasets.hits_all WHERE CounterID > 10000;
SELECT count()
FROM datasets.hits_all
WHERE CounterID > 10000
┌─count()─┐
│ 8841228 │
└─────────┘
1 rows in set. Elapsed: 0.083 sec. Processed 8.87 million rows, 35.46 MB (106.47 million rows/s., 425.89 MB/s.)
從上面的操作結果可以看出云數據庫ClickHouse在處理百萬至千萬數量級的條件查詢操作處理速度非常快,能夠滿足實時查詢的需求。