Commit 9828621e authored by Marcel Huber's avatar Marcel Huber
Browse files

updated MongoDB sharding solutions

- chunk size adjustment added (thanks to @ckirchme)
parent c812b90d
Pipeline #40365 passed with stages
in 2 minutes and 19 seconds
......@@ -128,9 +128,10 @@ Wählen Sie einen passenden Shardkey und begründen Sie ihre Wahl.
=====
.Ausgewählter *Shardkey* mit Begründung
ifdef::exercise_solution[]
`_id`:: Weil...
`name`:: Weil...
`state`:: Weil...
`_id`:: Numerische Werte eignen sich gut für eine gleichmässige Verteilung.
`name`:: Eignet sich gut für Datasets mit grosser Variation wie in diesem Fall.
Kommen jedoch in einem Sprachraum gewisse Wortanfänge häufiger vor, verteilen sich die Chunks nicht so gut.
`state`:: Sieht nach einer Gruppierung aus welche nicht zwingend gleichmässig verteilt sein muss.
endif::exercise_solution[]
ifndef::exercise_solution[]
{nbsp}
......@@ -337,6 +338,44 @@ mongos> sh.help()
+
[example,title=""]
=====
.Aktuellen Shard Status anzeigen
ifdef::exercise_solution[]
[source%autofit,javascript]
----
sh.status()
--- Sharding Status ---
sharding version: {
"_id" : 1,
"minCompatibleVersion" : 5,
"currentVersion" : 6,
"clusterId" : ObjectId("5ec2a5ebb4a8b473ab0cd507")
}
shards:
active mongoses:
autosplit:
Currently enabled: yes
balancer:
Currently enabled: yes
Currently running: no
Failed balancer rounds in last 5 attempts: 0
Migration Results for the last 24 hours:
No recent migrations
databases:
{ "_id" : "config", "primary" : "config", "partitioned" : true }
----
endif::exercise_solution[]
ifndef::exercise_solution[]
{nbsp}
{nbsp}
{nbsp}
endif::exercise_solution[]
=====
+
[example,title=""]
=====
.Shard Instanzen zuweisen
ifdef::exercise_solution[]
[source%autofit,javascript]
......@@ -351,14 +390,34 @@ sh.addShard('shard2:31002')
...
sh.addShard('shard3:31003')
...
ifdef::blubby[]
sh.addShard('dataset/shard1:31001,shard2:31002,shard3:31003')
{
"shardAdded" : "dataset",
"ok" : 1,
...
}
endif::blubby[]
----
.Anzeige des aktuellen Shard Status
[source%autofit,javascript]
----
sh.status()
--- Sharding Status ---
sharding version: {
"_id" : 1,
"minCompatibleVersion" : 5,
"currentVersion" : 6,
"clusterId" : ObjectId("5ec2a5ebb4a8b473ab0cd507")
}
shards:
{ "_id" : "shard0000", "host" : "shard1:31001", "state" : 1 }
{ "_id" : "shard0001", "host" : "shard2:31002", "state" : 1 }
{ "_id" : "shard0002", "host" : "shard3:31003", "state" : 1 }
active mongoses:
"4.2.6" : 1
autosplit:
Currently enabled: yes
balancer:
Currently enabled: yes
Currently running: no
Failed balancer rounds in last 5 attempts: 0
Migration Results for the last 24 hours:
No recent migrations
databases:
{ "_id" : "config", "primary" : "config", "partitioned" : true }
----
endif::exercise_solution[]
ifndef::exercise_solution[]
......@@ -417,6 +476,47 @@ ifndef::exercise_solution[]
{nbsp}
{nbsp}
endif::exercise_solution[]
=====
.. Anpassen der Chunkgrösse
+
Damit die Daten über die Shard Instanzen wie gewünscht verteilt werden muss die voreingestellte Chunkgrösse verkleinert werden.
_The default chunk size for a sharded cluster is 64 megabytes_ (https://docs.mongodb.com/manual/core/sharding-data-partitioning/#chunk-size).
+
[example,title=""]
=====
.Einstieg über den Shardserver (`mongos`)
ifdef::exercise_solution[]
[source,bash]
----
docker-compose --file Helpers/docker-compose.yml --project-name dataeng \
--project-directory . run mongoterm mongo --host shardserver:32000
----
endif::exercise_solution[]
ifndef::exercise_solution[]
{nbsp}
{nbsp}
endif::exercise_solution[]
=====
+
[example,title=""]
=====
.Chunkgrösse anpassen
ifdef::exercise_solution[]
[source%autofit,javascript]
----
use config
db.settings.save( { _id:"chunksize", value: 4 } )
WriteResult({ "nMatched" : 0, "nUpserted" : 1, "nModified" : 0, "_id" : "chunksize" })
----
endif::exercise_solution[]
ifndef::exercise_solution[]
{nbsp}
{nbsp}
endif::exercise_solution[]
=====
+
......@@ -432,7 +532,7 @@ sh.status()
"_id" : 1,
"minCompatibleVersion" : 5,
"currentVersion" : 6,
"clusterId" : ObjectId("5ea75de563f1b4284efd972e")
"clusterId" : ObjectId("5ec2a5ebb4a8b473ab0cd507")
}
shards:
{ "_id" : "shard0000", "host" : "shard1:31001", "state" : 1 }
......@@ -450,14 +550,21 @@ sh.status()
No recent migrations
databases:
{ "_id" : "config", "primary" : "config", "partitioned" : true }
{ "_id" : "test", "primary" : "shard0001", "partitioned" : true, "version" : { "uuid" : UUID("69112054-c1f7-401e-8252-b40286aca2f2"), "lastMod" : 1 } }
config.system.sessions
shard key: { "_id" : 1 }
unique: false
balancing: true
chunks:
shard0000 1
{ "_id" : { "$minKey" : 1 } } -->> { "_id" : { "$maxKey" : 1 } } on : shard0000 Timestamp(1, 0)
{ "_id" : "test", "primary" : "shard0000", "partitioned" : true, "version" : { "uuid" : UUID("0c2c424d-6036-44ee-989f-82d990fd0fcd"), "lastMod" : 1 } }
test.cities
shard key: { "name" : 1 }
unique: false
balancing: true
chunks:
shard0001 1
{ "name" : { "$minKey" : 1 } } -->> { "name" : { "$maxKey" : 1 } } on : shard0001 Timestamp(1, 0)
shard0000 1
{ "name" : { "$minKey" : 1 } } -->> { "name" : { "$maxKey" : 1 } } on : shard0000 Timestamp(1, 0)
----
endif::exercise_solution[]
ifndef::exercise_solution[]
......@@ -483,8 +590,8 @@ docker-compose --file Helpers/docker-compose.yml --project-name dataeng \
.Statusmeldung nach erfolgreichem Import
[source,console]
----
2020-04-27T22:38:55.076+0000 connected to: shardserver:32000
2020-04-27T22:38:56.846+0000 imported 114079 documents
2020-05-18T15:23:23.720+0000 connected to: mongodb://shardserver:32000/
2020-05-18T15:23:26.446+0000 114079 document(s) imported successfully. 0 document(s) failed to import.
----
endif::exercise_solution[]
ifndef::exercise_solution[]
......@@ -528,14 +635,14 @@ db.printShardingStatus(true)
"_id" : 1,
"minCompatibleVersion" : 5,
"currentVersion" : 6,
"clusterId" : ObjectId("5ea75de563f1b4284efd972e")
"clusterId" : ObjectId("5ec2a5ebb4a8b473ab0cd507")
}
shards:
{ "_id" : "shard0000", "host" : "shard1:31001", "state" : 1 }
{ "_id" : "shard0001", "host" : "shard2:31002", "state" : 1 }
{ "_id" : "shard0002", "host" : "shard3:31003", "state" : 1 }
active mongoses:
{ "_id" : "shardserver:32000", "advisoryHostFQDNs" : [ ], "mongoVersion" : "4.2.6", "ping" : ISODate("2020-04-27T22:42:56.090Z"), "up" : NumberLong(490), "waiting" : true }
{ "_id" : "shardserver:32000", "advisoryHostFQDNs" : [ ], "mongoVersion" : "4.2.6", "ping" : ISODate("2020-05-18T15:24:19.175Z"), "up" : NumberLong(651), "waiting" : true }
autosplit:
Currently enabled: yes
balancer:
......@@ -543,7 +650,7 @@ db.printShardingStatus(true)
Currently running: no
Failed balancer rounds in last 5 attempts: 0
Migration Results for the last 24 hours:
No recent migrations
12 : Success
databases:
{ "_id" : "config", "primary" : "config", "partitioned" : true }
config.system.sessions
......@@ -553,17 +660,110 @@ db.printShardingStatus(true)
chunks:
shard0000 1
{ "_id" : { "$minKey" : 1 } } -->> { "_id" : { "$maxKey" : 1 } } on : shard0000 Timestamp(1, 0)
{ "_id" : "test", "primary" : "shard0001", "partitioned" : true, "version" : { "uuid" : UUID("69112054-c1f7-401e-8252-b40286aca2f2"), "lastMod" : 1 } }
{ "_id" : "test", "primary" : "shard0000", "partitioned" : true, "version" : { "uuid" : UUID("0c2c424d-6036-44ee-989f-82d990fd0fcd"), "lastMod" : 1 } }
test.cities
shard key: { "name" : 1 }
unique: false
balancing: true
chunks:
shard0001 1
{ "name" : { "$minKey" : 1 } } -->> { "name" : { "$maxKey" : 1 } } on : shard0001 Timestamp(1, 0)
shard0000 7
shard0001 6
shard0002 6
{ "name" : { "$minKey" : 1 } } -->> { "name" : "Aadorf" } on : shard0002 Timestamp(3, 0)
{ "name" : "Aadorf" } -->> { "name" : "Boos" } on : shard0001 Timestamp(12, 0)
{ "name" : "Boos" } -->> { "name" : "Croft" } on : shard0001 Timestamp(13, 0)
{ "name" : "Croft" } -->> { "name" : "Dolní Kounice" } on : shard0002 Timestamp(14, 0)
{ "name" : "Dolní Kounice" } -->> { "name" : "Glemsford" } on : shard0002 Timestamp(15, 0)
{ "name" : "Glemsford" } -->> { "name" : "Jeju City" } on : shard0001 Timestamp(16, 0)
{ "name" : "Jeju City" } -->> { "name" : "Jungingen" } on : shard0002 Timestamp(17, 0)
{ "name" : "Jungingen" } -->> { "name" : "Khakhea" } on : shard0001 Timestamp(18, 0)
{ "name" : "Khakhea" } -->> { "name" : "Letschin" } on : shard0001 Timestamp(19, 0)
{ "name" : "Letschin" } -->> { "name" : "Mermuth" } on : shard0002 Timestamp(20, 0)
{ "name" : "Mermuth" } -->> { "name" : "Mittelherwigsdorf" } on : shard0001 Timestamp(21, 0)
{ "name" : "Mittelherwigsdorf" } -->> { "name" : "Oberaula" } on : shard0002 Timestamp(22, 0)
{ "name" : "Oberaula" } -->> { "name" : "Pontian" } on : shard0000 Timestamp(19, 1)
{ "name" : "Pontian" } -->> { "name" : "Pruna" } on : shard0000 Timestamp(20, 1)
{ "name" : "Pruna" } -->> { "name" : "Qaraçala" } on : shard0000 Timestamp(22, 1)
{ "name" : "Qaraçala" } -->> { "name" : "Saponara" } on : shard0000 Timestamp(15, 1)
{ "name" : "Saponara" } -->> { "name" : "Tres Ríos" } on : shard0000 Timestamp(16, 1)
{ "name" : "Tres Ríos" } -->> { "name" : "Velké Bílovice" } on : shard0000 Timestamp(17, 1)
{ "name" : "Velké Bílovice" } -->> { "name" : { "$maxKey" : 1 } } on : shard0000 Timestamp(21, 1)
----
.Detaillierten Datenbankstatus anzeigen
[source,javascript]
----
db.stats()
...
{
"raw" : {
"shard1:31001" : {
"db" : "test",
"collections" : 1,
"views" : 0,
"objects" : 114052,
"avgObjSize" : 348.63631501420406,
"dataSize" : 39762669,
"storageSize" : 15831040,
"numExtents" : 0,
"indexes" : 2,
"indexSize" : 2912256,
"scaleFactor" : 1,
"fsUsedSize" : 149291008,
"fsTotalSize" : 106859855872,
"ok" : 1
},
"shard2:31002" : {
"db" : "test",
"collections" : 1,
"views" : 0,
"objects" : 42981,
"avgObjSize" : 350.9689164979875,
"dataSize" : 15084995,
"storageSize" : 6033408,
"numExtents" : 0,
"indexes" : 2,
"indexSize" : 1097728,
"scaleFactor" : 1,
"fsUsedSize" : 149291008,
"fsTotalSize" : 106859855872,
"ok" : 1
},
"shard3:31003" : {
"db" : "test",
"collections" : 1,
"views" : 0,
"objects" : 26981,
"avgObjSize" : 347.4246692116675,
"dataSize" : 9373865,
"storageSize" : 3645440,
"numExtents" : 0,
"indexes" : 2,
"indexSize" : 704512,
"scaleFactor" : 1,
"fsUsedSize" : 149291008,
"fsTotalSize" : 106859855872,
"ok" : 1
}
},
"objects" : 184014,
"avgObjSize" : 348.32052452530786,
"dataSize" : 64221529,
"storageSize" : 25509888,
"numExtents" : 0,
"indexes" : 6,
"indexSize" : 4714496,
"scaleFactor" : 1,
"fileSize" : 0,
"ok" : 1,
"operationTime" : Timestamp(1589815489, 1),
"$clusterTime" : {
"clusterTime" : Timestamp(1589815489, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
----
endif::exercise_solution[]
ifndef::exercise_solution[]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment