storm配置:如何解决worker进程内存过小的问题
问题导读
1.如何设置storm内存?
2.如果没有配置文件的情况下,该如何配置一些参数?
3.通过哪个参数可以配置内存?
Storm中真正干活的是各个worker,而worker由supervisor负责启动。在topology启动过程中我们会看到如下的启动日志:
这就是启动一个worker进程,也就是一个JVM进程。
默认情况下,Storm启动worker进程时,JVM的最大内存是768M。
但我在使用过程中,由于会在Bolt中加载大量数据,768M内存无法满足需求,会导致内存溢出程序崩溃。
经过研究发现,可以通过在Strom的配置文件storm.yaml中设置worker的启动参数:
1 worker.childopts: "-Xmx2048m"
该参数会在启动时传递给JVM,然后就可以在worker中使用2048m内存了。
目前好像Storm还没有配置文件的详细说明,比如可以配置哪些参数,怎么配置?
大家可以先参考Storm源代码中的Config.java.
1 package backtype.storm; 2 3 import backtype.storm.ConfigValidation; 4 import backtype.storm.serialization.IKryoDecorator; 5 import backtype.storm.serialization.IKryoFactory; 6 import com.esotericsoftware.kryo.Serializer; 7 import java.util.ArrayList; 8 import java.util.HashMap; 9 import java.util.List; 10 import java.util.Map; 11 12 /** 13 * Topology configs are specified as a plain old map. This class provides a 14 * convenient way to create a topology config map by providing setter methods for 15 * all the configs that can be set. It also makes it easier to do things like add 16 * serializations. 17 * 18 * <p>This class also provides constants for all the configurations possible on 19 * a Storm cluster and Storm topology. Each constant is paired with a schema 20 * that defines the validity criterion of the corresponding field. Default 21 * values for these configs can be found in defaults.yaml.</p> 22 * 23 * <p>Note that you may put other configurations in any of the configs. Storm 24 * will ignore anything it doesn't recognize, but your topologies are free to make 25 * use of them by reading them in the prepare method of Bolts or the open method of 26 * Spouts.</p> 27 */ 28 public class Config extends HashMap<String, Object> { 29 /** 30 * The transporter for communication among Storm tasks 31 */ 32 public static final String STORM_MESSAGING_TRANSPORT = "storm.messaging.transport"; 33 public static final Object STORM_MESSAGING_TRANSPORT_SCHEMA = String.class; 34 35 /** 36 * Netty based messaging: The buffer size for send/recv buffer 37 */ 38 public static final String STORM_MESSAGING_NETTY_BUFFER_SIZE = "storm.messaging.netty.buffer_size"; 39 public static final Object STORM_MESSAGING_NETTY_BUFFER_SIZE_SCHEMA = Number.class; 40 41 /** 42 * Netty based messaging: The max # of retries that a peer will perform when a remote is not accessible 43 */ 44 public static final String STORM_MESSAGING_NETTY_MAX_RETRIES = "storm.messaging.netty.max_retries"; 45 public static final Object STORM_MESSAGING_NETTY_MAX_RETRIES_SCHEMA = Number.class; 46 47 /** 48 * Netty based messaging: The min # of milliseconds that a peer will wait. 49 */ 50 public static final String STORM_MESSAGING_NETTY_MIN_SLEEP_MS = "storm.messaging.netty.min_wait_ms"; 51 public static final Object STORM_MESSAGING_NETTY_MIN_SLEEP_MS_SCHEMA = Number.class; 52 53 /** 54 * Netty based messaging: The max # of milliseconds that a peer will wait. 55 */ 56 public static final String STORM_MESSAGING_NETTY_MAX_SLEEP_MS = "storm.messaging.netty.max_wait_ms"; 57 public static final Object STORM_MESSAGING_NETTY_MAX_SLEEP_MS_SCHEMA = Number.class; 58 59 /** 60 * Netty based messaging: The # of worker threads for the server. 61 */ 62 public static final String STORM_MESSAGING_NETTY_SERVER_WORKER_THREADS = "storm.messaging.netty.server_worker_threads"; 63 public static final Object STORM_MESSAGING_NETTY_SERVER_WORKER_THREADS_SCHEMA = Number.class; 64 65 /** 66 * Netty based messaging: The # of worker threads for the client. 67 */ 68 public static final String STORM_MESSAGING_NETTY_CLIENT_WORKER_THREADS = "storm.messaging.netty.client_worker_threads"; 69 public static final Object STORM_MESSAGING_NETTY_CLIENT_WORKER_THREADS_SCHEMA = Number.class; 70 71 /** 72 * A list of hosts of ZooKeeper servers used to manage the cluster. 73 */ 74 public static final String STORM_ZOOKEEPER_SERVERS = "storm.zookeeper.servers"; 75 public static final Object STORM_ZOOKEEPER_SERVERS_SCHEMA = ConfigValidation.StringsValidator; 76 77 /** 78 * The port Storm will use to connect to each of the ZooKeeper servers. 79 */ 80 public static final String STORM_ZOOKEEPER_PORT = "storm.zookeeper.port"; 81 public static final Object STORM_ZOOKEEPER_PORT_SCHEMA = Number.class; 82 83 /** 84 * A directory on the local filesystem used by Storm for any local 85 * filesystem usage it needs. The directory must exist and the Storm daemons must 86 * have permission to read/write from this location. 87 */ 88 public static final String STORM_LOCAL_DIR = "storm.local.dir"; 89 public static final Object STORM_LOCAL_DIR_SCHEMA = String.class; 90 91 /** 92 * A global task scheduler used to assign topologies's tasks to supervisors' wokers. 93 * 94 * If this is not set, a default system scheduler will be used. 95 */ 96 public static final String STORM_SCHEDULER = "storm.scheduler"; 97 public static final Object STORM_SCHEDULER_SCHEMA = String.class; 98 99 /** 100 * The mode this Storm cluster is running in. Either "distributed" or "local". 101 */ 102 public static final String STORM_CLUSTER_MODE = "storm.cluster.mode"; 103 public static final Object STORM_CLUSTER_MODE_SCHEMA = String.class; 104 105 /** 106 * The hostname the supervisors/workers should report to nimbus. If unset, Storm will 107 * get the hostname to report by calling <code>InetAddress.getLocalHost().getCanonicalHostName()</code>. 108 * 109 * You should set this config when you dont have a DNS which supervisors/workers 110 * can utilize to find each other based on hostname got from calls to 111 * <code>InetAddress.getLocalHost().getCanonicalHostName()</code>. 112 */ 113 public static final String STORM_LOCAL_HOSTNAME = "storm.local.hostname"; 114 public static final Object STORM_LOCAL_HOSTNAME_SCHEMA = String.class; 115 116 /** 117 * The transport plug-in for Thrift client/server communication 118 */ 119 public static final String STORM_THRIFT_TRANSPORT_PLUGIN = "storm.thrift.transport"; 120 public static final Object STORM_THRIFT_TRANSPORT_PLUGIN_SCHEMA = String.class; 121 122 /** 123 * The serializer class for ListDelegate (tuple payload). 124 * The default serializer will be ListDelegateSerializer 125 */ 126 public static final String TOPOLOGY_TUPLE_SERIALIZER = "topology.tuple.serializer"; 127 public static final Object TOPOLOGY_TUPLE_SERIALIZER_SCHEMA = String.class; 128 129 /** 130 * Whether or not to use ZeroMQ for messaging in local mode. If this is set 131 * to false, then Storm will use a pure-Java messaging system. The purpose 132 * of this flag is to make it easy to run Storm in local mode by eliminating 133 * the need for native dependencies, which can be difficult to install. 134 * 135 * Defaults to false. 136 */ 137 public static final String STORM_LOCAL_MODE_ZMQ = "storm.local.mode.zmq"; 138 public static final Object STORM_LOCAL_MODE_ZMQ_SCHEMA = Boolean.class; 139 140 /** 141 * The root location at which Storm stores data in ZooKeeper. 142 */ 143 public static final String STORM_ZOOKEEPER_ROOT = "storm.zookeeper.root"; 144 public static final Object STORM_ZOOKEEPER_ROOT_SCHEMA = String.class; 145 146 /** 147 * The session timeout for clients to ZooKeeper. 148 */ 149 public static final String STORM_ZOOKEEPER_SESSION_TIMEOUT = "storm.zookeeper.session.timeout"; 150 public static final Object STORM_ZOOKEEPER_SESSION_TIMEOUT_SCHEMA = Number.class; 151 152 /** 153 * The connection timeout for clients to ZooKeeper. 154 */ 155 public static final String STORM_ZOOKEEPER_CONNECTION_TIMEOUT = "storm.zookeeper.connection.timeout"; 156 public static final Object STORM_ZOOKEEPER_CONNECTION_TIMEOUT_SCHEMA = Number.class; 157 158 /** 159 * The number of times to retry a Zookeeper operation. 160 */ 161 public static final String STORM_ZOOKEEPER_RETRY_TIMES="storm.zookeeper.retry.times"; 162 public static final Object STORM_ZOOKEEPER_RETRY_TIMES_SCHEMA = Number.class; 163 164 /** 165 * The interval between retries of a Zookeeper operation. 166 */ 167 public static final String STORM_ZOOKEEPER_RETRY_INTERVAL="storm.zookeeper.retry.interval"; 168 public static final Object STORM_ZOOKEEPER_RETRY_INTERVAL_SCHEMA = Number.class; 169 170 /** 171 * The ceiling of the interval between retries of a Zookeeper operation. 172 */ 173 public static final String STORM_ZOOKEEPER_RETRY_INTERVAL_CEILING="storm.zookeeper.retry.intervalceiling.millis"; 174 public static final Object STORM_ZOOKEEPER_RETRY_INTERVAL_CEILING_SCHEMA = Number.class; 175 176 /** 177 * The Zookeeper authentication scheme to use, e.g. "digest". Defaults to no authentication. 178 */ 179 public static final String STORM_ZOOKEEPER_AUTH_SCHEME="storm.zookeeper.auth.scheme"; 180 public static final Object STORM_ZOOKEEPER_AUTH_SCHEME_SCHEMA = String.class; 181 182 /** 183 * A string representing the payload for Zookeeper authentication. It gets serialized using UTF-8 encoding during authentication. 184 */ 185 public static final String STORM_ZOOKEEPER_AUTH_PAYLOAD="storm.zookeeper.auth.payload"; 186 public static final Object STORM_ZOOKEEPER_AUTH_PAYLOAD_SCHEMA = String.class; 187 188 /** 189 * The id assigned to a running topology. The id is the storm name with a unique nonce appended. 190 */ 191 public static final String STORM_ID = "storm.id"; 192 public static final Object STORM_ID_SCHEMA = String.class; 193 194 /** 195 * The host that the master server is running on. 196 */ 197 public static final String NIMBUS_HOST = "nimbus.host"; 198 public static final Object NIMBUS_HOST_SCHEMA = String.class; 199 200 /** 201 * Which port the Thrift interface of Nimbus should run on. Clients should 202 * connect to this port to upload jars and submit topologies. 203 */ 204 public static final String NIMBUS_THRIFT_PORT = "nimbus.thrift.port"; 205 public static final Object NIMBUS_THRIFT_PORT_SCHEMA = Number.class; 206 207 208 /** 209 * This parameter is used by the storm-deploy project to configure the 210 * jvm options for the nimbus daemon. 211 */ 212 public static final String NIMBUS_CHILDOPTS = "nimbus.childopts"; 213 public static final Object NIMBUS_CHILDOPTS_SCHEMA = String.class; 214 215 216 /** 217 * How long without heartbeating a task can go before nimbus will consider the 218 * task dead and reassign it to another location. 219 */ 220 public static final String NIMBUS_TASK_TIMEOUT_SECS = "nimbus.task.timeout.secs"; 221 public static final Object NIMBUS_TASK_TIMEOUT_SECS_SCHEMA = Number.class; 222 223 224 /** 225 * How often nimbus should wake up to check heartbeats and do reassignments. Note 226 * that if a machine ever goes down Nimbus will immediately wake up and take action. 227 * This parameter is for checking for failures when there's no explicit event like that 228 * occuring. 229 */ 230 public static final String NIMBUS_MONITOR_FREQ_SECS = "nimbus.monitor.freq.secs"; 231 public static final Object NIMBUS_MONITOR_FREQ_SECS_SCHEMA = Number.class; 232 233 /** 234 * How often nimbus should wake the cleanup thread to clean the inbox. 235 * @see NIMBUS_INBOX_JAR_EXPIRATION_SECS 236 */ 237 public static final String NIMBUS_CLEANUP_INBOX_FREQ_SECS = "nimbus.cleanup.inbox.freq.secs"; 238 public static final Object NIMBUS_CLEANUP_INBOX_FREQ_SECS_SCHEMA = Number.class; 239 240 /** 241 * The length of time a jar file lives in the inbox before being deleted by the cleanup thread. 242 * 243 * Probably keep this value greater than or equal to NIMBUS_CLEANUP_INBOX_JAR_EXPIRATION_SECS. 244 * Note that the time it takes to delete an inbox jar file is going to be somewhat more than 245 * NIMBUS_CLEANUP_INBOX_JAR_EXPIRATION_SECS (depending on how often NIMBUS_CLEANUP_FREQ_SECS 246 * is set to). 247 * @see NIMBUS_CLEANUP_FREQ_SECS 248 */ 249 public static final String NIMBUS_INBOX_JAR_EXPIRATION_SECS = "nimbus.inbox.jar.expiration.secs"; 250 public static final Object NIMBUS_INBOX_JAR_EXPIRATION_SECS_SCHEMA = Number.class; 251 252 /** 253 * How long before a supervisor can go without heartbeating before nimbus considers it dead 254 * and stops assigning new work to it. 255 */ 256 public static final String NIMBUS_SUPERVISOR_TIMEOUT_SECS = "nimbus.supervisor.timeout.secs"; 257 public static final Object NIMBUS_SUPERVISOR_TIMEOUT_SECS_SCHEMA = Number.class; 258 259 /** 260 * A special timeout used when a task is initially launched. During launch, this is the timeout 261 * used until the first heartbeat, overriding nimbus.task.timeout.secs. 262 * 263 * <p>A separate timeout exists for launch because there can be quite a bit of overhead 264 * to launching new JVM's and configuring them.</p> 265 */ 266 public static final String NIMBUS_TASK_LAUNCH_SECS = "nimbus.task.launch.secs"; 267 public static final Object NIMBUS_TASK_LAUNCH_SECS_SCHEMA = Number.class; 268 269 /** 270 * Whether or not nimbus should reassign tasks if it detects that a task goes down. 271 * Defaults to true, and it's not recommended to change this value. 272 */ 273 public static final String NIMBUS_REASSIGN = "nimbus.reassign"; 274 public static final Object NIMBUS_REASSIGN_SCHEMA = Boolean.class; 275 276 /** 277 * During upload/download with the master, how long an upload or download connection is idle 278 * before nimbus considers it dead and drops the connection. 279 */ 280 public static final String NIMBUS_FILE_COPY_EXPIRATION_SECS = "nimbus.file.copy.expiration.secs"; 281 public static final Object NIMBUS_FILE_COPY_EXPIRATION_SECS_SCHEMA = Number.class; 282 283 /** 284 * A custom class that implements ITopologyValidator that is run whenever a 285 * topology is submitted. Can be used to provide business-specific logic for 286 * whether topologies are allowed to run or not. 287 */ 288 public static final String NIMBUS_TOPOLOGY_VALIDATOR = "nimbus.topology.validator"; 289 public static final Object NIMBUS_TOPOLOGY_VALIDATOR_SCHEMA = String.class; 290 291 /** 292 * Class name for authorization plugin for Nimbus 293 */ 294 public static final String NIMBUS_AUTHORIZER = "nimbus.authorizer"; 295 public static final Object NIMBUS_AUTHORIZER_SCHEMA = String.class; 296 297 /** 298 * Storm UI binds to this port. 299 */ 300 public static final String UI_PORT = "ui.port"; 301 public static final Object UI_PORT_SCHEMA = Number.class; 302 303 /** 304 * HTTP UI port for log viewer 305 */ 306 public static final String LOGVIEWER_PORT = "logviewer.port"; 307 public static final Object LOGVIEWER_PORT_SCHEMA = Number.class; 308 309 /** 310 * Childopts for log viewer java process. 311 */ 312 public static final String LOGVIEWER_CHILDOPTS = "logviewer.childopts"; 313 public static final Object LOGVIEWER_CHILDOPTS_SCHEMA = String.class; 314 315 /** 316 * Appender name used by log viewer to determine log directory. 317 */ 318 public static final String LOGVIEWER_APPENDER_NAME = "logviewer.appender.name"; 319 public static final Object LOGVIEWER_APPENDER_NAME_SCHEMA = String.class; 320 321 /** 322 * Childopts for Storm UI Java process. 323 */ 324 public static final String UI_CHILDOPTS = "ui.childopts"; 325 public static final Object UI_CHILDOPTS_SCHEMA = String.class; 326 327 /** 328 * List of DRPC servers so that the DRPCSpout knows who to talk to. 329 */ 330 public static final String DRPC_SERVERS = "drpc.servers"; 331 public static final Object DRPC_SERVERS_SCHEMA = ConfigValidation.StringsValidator; 332 333 /** 334 * This port is used by Storm DRPC for receiving DPRC requests from clients. 335 */ 336 public static final String DRPC_PORT = "drpc.port"; 337 public static final Object DRPC_PORT_SCHEMA = Number.class; 338 339 /** 340 * DRPC thrift server worker threads 341 */ 342 public static final String DRPC_WORKER_THREADS = "drpc.worker.threads"; 343 public static final Object DRPC_WORKER_THREADS_SCHEMA = Number.class; 344 345 /** 346 * DRPC thrift server queue size 347 */ 348 public static final String DRPC_QUEUE_SIZE = "drpc.queue.size"; 349 public static final Object DRPC_QUEUE_SIZE_SCHEMA = Number.class; 350 351 /** 352 * This port on Storm DRPC is used by DRPC topologies to receive function invocations and send results back. 353 */ 354 public static final String DRPC_INVOCATIONS_PORT = "drpc.invocations.port"; 355 public static final Object DRPC_INVOCATIONS_PORT_SCHEMA = Number.class; 356 357 /** 358 * The timeout on DRPC requests within the DRPC server. Defaults to 10 minutes. Note that requests can also 359 * timeout based on the socket timeout on the DRPC client, and separately based on the topology message 360 * timeout for the topology implementing the DRPC function. 361 */ 362 public static final String DRPC_REQUEST_TIMEOUT_SECS = "drpc.request.timeout.secs"; 363 public static final Object DRPC_REQUEST_TIMEOUT_SECS_SCHEMA = Number.class; 364 365 /** 366 * Childopts for Storm DRPC Java process. 367 */ 368 public static final String DRPC_CHILDOPTS = "drpc.childopts"; 369 public static final Object DRPC_CHILDOPTS_SCHEMA = String.class; 370 371 /** 372 * the metadata configed on the supervisor 373 */ 374 public static final String SUPERVISOR_SCHEDULER_META = "supervisor.scheduler.meta"; 375 public static final Object SUPERVISOR_SCHEDULER_META_SCHEMA = Map.class; 376 /** 377 * A list of ports that can run workers on this supervisor. Each worker uses one port, and 378 * the supervisor will only run one worker per port. Use this configuration to tune 379 * how many workers run on each machine. 380 */ 381 public static final String SUPERVISOR_SLOTS_PORTS = "supervisor.slots.ports"; 382 public static final Object SUPERVISOR_SLOTS_PORTS_SCHEMA = ConfigValidation.NumbersValidator; 383 384 385 /** 386 * This parameter is used by the storm-deploy project to configure the 387 * jvm options for the supervisor daemon. 388 */ 389 public static final String SUPERVISOR_CHILDOPTS = "supervisor.childopts"; 390 public static final Object SUPERVISOR_CHILDOPTS_SCHEMA = String.class; 391 392 393 /** 394 * How long a worker can go without heartbeating before the supervisor tries to 395 * restart the worker process. 396 */ 397 public static final String SUPERVISOR_WORKER_TIMEOUT_SECS = "supervisor.worker.timeout.secs"; 398 public static final Object SUPERVISOR_WORKER_TIMEOUT_SECS_SCHEMA = Number.class; 399 400 401 /** 402 * How long a worker can go without heartbeating during the initial launch before 403 * the supervisor tries to restart the worker process. This value override 404 * supervisor.worker.timeout.secs during launch because there is additional 405 * overhead to starting and configuring the JVM on launch. 406 */ 407 public static final String SUPERVISOR_WORKER_START_TIMEOUT_SECS = "supervisor.worker.start.timeout.secs"; 408 public static final Object SUPERVISOR_WORKER_START_TIMEOUT_SECS_SCHEMA = Number.class; 409 410 411 /** 412 * Whether or not the supervisor should launch workers assigned to it. Defaults 413 * to true -- and you should probably never change this value. This configuration 414 * is used in the Storm unit tests. 415 */ 416 public static final String SUPERVISOR_ENABLE = "supervisor.enable"; 417 public static final Object SUPERVISOR_ENABLE_SCHEMA = Boolean.class; 418 419 420 /** 421 * how often the supervisor sends a heartbeat to the master. 422 */ 423 public static final String SUPERVISOR_HEARTBEAT_FREQUENCY_SECS = "supervisor.heartbeat.frequency.secs"; 424 public static final Object SUPERVISOR_HEARTBEAT_FREQUENCY_SECS_SCHEMA = Number.class; 425 426 427 /** 428 * How often the supervisor checks the worker heartbeats to see if any of them 429 * need to be restarted. 430 */ 431 public static final String SUPERVISOR_MONITOR_FREQUENCY_SECS = "supervisor.monitor.frequency.secs"; 432 public static final Object SUPERVISOR_MONITOR_FREQUENCY_SECS_SCHEMA = Number.class; 433 434 /** 435 * The jvm opts provided to workers launched by this supervisor. All "%ID%" substrings are replaced 436 * with an identifier for this worker. 437 */ 438 public static final String WORKER_CHILDOPTS = "worker.childopts"; 439 public static final Object WORKER_CHILDOPTS_SCHEMA = String.class; 440 441 442 /** 443 * How often this worker should heartbeat to the supervisor. 444 */ 445 public static final String WORKER_HEARTBEAT_FREQUENCY_SECS = "worker.heartbeat.frequency.secs"; 446 public static final Object WORKER_HEARTBEAT_FREQUENCY_SECS_SCHEMA = Number.class; 447 448 /** 449 * How often a task should heartbeat its status to the master. 450 */ 451 public static final String TASK_HEARTBEAT_FREQUENCY_SECS = "task.heartbeat.frequency.secs"; 452 public static final Object TASK_HEARTBEAT_FREQUENCY_SECS_SCHEMA = Number.class; 453 454 455 /** 456 * How often a task should sync its connections with other tasks (if a task is 457 * reassigned, the other tasks sending messages to it need to refresh their connections). 458 * In general though, when a reassignment happens other tasks will be notified 459 * almost immediately. This configuration is here just in case that notification doesn't 460 * come through. 461 */ 462 public static final String TASK_REFRESH_POLL_SECS = "task.refresh.poll.secs"; 463 public static final Object TASK_REFRESH_POLL_SECS_SCHEMA = Number.class; 464 465 466 467 /** 468 * True if Storm should timeout messages or not. Defaults to true. This is meant to be used 469 * in unit tests to prevent tuples from being accidentally timed out during the test. 470 */ 471 public static final String TOPOLOGY_ENABLE_MESSAGE_TIMEOUTS = "topology.enable.message.timeouts"; 472 public static final Object TOPOLOGY_ENABLE_MESSAGE_TIMEOUTS_SCHEMA = Boolean.class; 473 474 /** 475 * When set to true, Storm will log every message that's emitted. 476 */ 477 public static final String TOPOLOGY_DEBUG = "topology.debug"; 478 public static final Object TOPOLOGY_DEBUG_SCHEMA = Boolean.class; 479 480 481 /** 482 * Whether or not the master should optimize topologies by running multiple 483 * tasks in a single thread where appropriate. 484 */ 485 public static final String TOPOLOGY_OPTIMIZE = "topology.optimize"; 486 public static final Object TOPOLOGY_OPTIMIZE_SCHEMA = Boolean.class; 487 488 /** 489 * How many processes should be spawned around the cluster to execute this 490 * topology. Each process will execute some number of tasks as threads within 491 * them. This parameter should be used in conjunction with the parallelism hints 492 * on each component in the topology to tune the performance of a topology. 493 */ 494 public static final String TOPOLOGY_WORKERS = "topology.workers"; 495 public static final Object TOPOLOGY_WORKERS_SCHEMA = Number.class; 496 497 /** 498 * How many instances to create for a spout/bolt. A task runs on a thread with zero or more 499 * other tasks for the same spout/bolt. The number of tasks for a spout/bolt is always 500 * the same throughout the lifetime of a topology, but the number of executors (threads) for 501 * a spout/bolt can change over time. This allows a topology to scale to more or less resources 502 * without redeploying the topology or violating the constraints of Storm (such as a fields grouping 503 * guaranteeing that the same value goes to the same task). 504 */ 505 public static final String TOPOLOGY_TASKS = "topology.tasks"; 506 public static final Object TOPOLOGY_TASKS_SCHEMA = Number.class; 507 508 /** 509 * How many executors to spawn for ackers. 510 * 511 * <p>If this is set to 0, then Storm will immediately ack tuples as soon 512 * as they come off the spout, effectively disabling reliability.</p> 513 */ 514 public static final String TOPOLOGY_ACKER_EXECUTORS = "topology.acker.executors"; 515 public static final Object TOPOLOGY_ACKER_EXECUTORS_SCHEMA = Number.class; 516 517 518 /** 519 * The maximum amount of time given to the topology to fully process a message 520 * emitted by a spout. If the message is not acked within this time frame, Storm 521 * will fail the message on the spout. Some spouts implementations will then replay 522 * the message at a later time. 523 */ 524 public static final String TOPOLOGY_MESSAGE_TIMEOUT_SECS = "topology.message.timeout.secs"; 525 public static final Object TOPOLOGY_MESSAGE_TIMEOUT_SECS_SCHEMA = Number.class; 526 527 /** 528 * A list of serialization registrations for Kryo ( http://code.google.com/p/kryo/ ), 529 * the underlying serialization framework for Storm. A serialization can either 530 * be the name of a class (in which case Kryo will automatically create a serializer for the class 531 * that saves all the object's fields), or an implementation of com.esotericsoftware.kryo.Serializer. 532 * 533 * See Kryo's documentation for more information about writing custom serializers. 534 */ 535 public static final String TOPOLOGY_KRYO_REGISTER = "topology.kryo.register"; 536 public static final Object TOPOLOGY_KRYO_REGISTER_SCHEMA = ConfigValidation.StringsValidator; 537 538 /** 539 * A list of classes that customize storm's kryo instance during start-up. 540 * Each listed class name must implement IKryoDecorator. During start-up the 541 * listed class is instantiated with 0 arguments, then its 'decorate' method 542 * is called with storm's kryo instance as the only argument. 543 */ 544 public static final String TOPOLOGY_KRYO_DECORATORS = "topology.kryo.decorators"; 545 public static final Object TOPOLOGY_KRYO_DECORATORS_SCHEMA = ConfigValidation.StringsValidator; 546 547 /** 548 * Class that specifies how to create a Kryo instance for serialization. Storm will then apply 549 * topology.kryo.register and topology.kryo.decorators on top of this. The default implementation 550 * implements topology.fall.back.on.java.serialization and turns references off. 551 */ 552 public static final String TOPOLOGY_KRYO_FACTORY = "topology.kryo.factory"; 553 public static final Object TOPOLOGY_KRYO_FACTORY_SCHEMA = String.class; 554 555 556 /** 557 * Whether or not Storm should skip the loading of kryo registrations for which it 558 * does not know the class or have the serializer implementation. Otherwise, the task will 559 * fail to load and will throw an error at runtime. The use case of this is if you want to 560 * declare your serializations on the storm.yaml files on the cluster rather than every single 561 * time you submit a topology. Different applications may use different serializations and so 562 * a single application may not have the code for the other serializers used by other apps. 563 * By setting this config to true, Storm will ignore that it doesn't have those other serializations 564 * rather than throw an error. 565 */ 566 public static final String TOPOLOGY_SKIP_MISSING_KRYO_REGISTRATIONS= "topology.skip.missing.kryo.registrations"; 567 public static final Object TOPOLOGY_SKIP_MISSING_KRYO_REGISTRATIONS_SCHEMA = Boolean.class; 568 569 /* 570 * A list of classes implementing IMetricsConsumer (See storm.yaml.example for exact config format). 571 * Each listed class will be routed all the metrics data generated by the storm metrics API. 572 * Each listed class maps 1:1 to a system bolt named __metrics_ClassName#N, and it's parallelism is configurable. 573 */ 574 public static final String TOPOLOGY_METRICS_CONSUMER_REGISTER = "topology.metrics.consumer.register"; 575 public static final Object TOPOLOGY_METRICS_CONSUMER_REGISTER_SCHEMA = ConfigValidation.MapsValidator; 576 577 578 /** 579 * The maximum parallelism allowed for a component in this topology. This configuration is 580 * typically used in testing to limit the number of threads spawned in local mode. 581 */ 582 public static final String TOPOLOGY_MAX_TASK_PARALLELISM="topology.max.task.parallelism"; 583 public static final Object TOPOLOGY_MAX_TASK_PARALLELISM_SCHEMA = Number.class; 584 585 586 /** 587 * The maximum number of tuples that can be pending on a spout task at any given time. 588 * This config applies to individual tasks, not to spouts or topologies as a whole. 589 * 590 * A pending tuple is one that has been emitted from a spout but has not been acked or failed yet. 591 * Note that this config parameter has no effect for unreliable spouts that don't tag 592 * their tuples with a message id. 593 */ 594 public static final String TOPOLOGY_MAX_SPOUT_PENDING="topology.max.spout.pending"; 595 public static final Object TOPOLOGY_MAX_SPOUT_PENDING_SCHEMA = Number.class; 596 597 /** 598 * A class that implements a strategy for what to do when a spout needs to wait. Waiting is 599 * triggered in one of two conditions: 600 * 601 * 1. nextTuple emits no tuples 602 * 2. The spout has hit maxSpoutPending and can't emit any more tuples 603 */ 604 public static final String TOPOLOGY_SPOUT_WAIT_STRATEGY="topology.spout.wait.strategy"; 605 public static final Object TOPOLOGY_SPOUT_WAIT_STRATEGY_SCHEMA = String.class; 606 607 /** 608 * The amount of milliseconds the SleepEmptyEmitStrategy should sleep for. 609 */ 610 public static final String TOPOLOGY_SLEEP_SPOUT_WAIT_STRATEGY_TIME_MS="topology.sleep.spout.wait.strategy.time.ms"; 611 public static final Object TOPOLOGY_SLEEP_SPOUT_WAIT_STRATEGY_TIME_MS_SCHEMA = Number.class; 612 613 /** 614 * The maximum amount of time a component gives a source of state to synchronize before it requests 615 * synchronization again. 616 */ 617 public static final String TOPOLOGY_STATE_SYNCHRONIZATION_TIMEOUT_SECS="topology.state.synchronization.timeout.secs"; 618 public static final Object TOPOLOGY_STATE_SYNCHRONIZATION_TIMEOUT_SECS_SCHEMA = Number.class; 619 620 /** 621 * The percentage of tuples to sample to produce stats for a task. 622 */ 623 public static final String TOPOLOGY_STATS_SAMPLE_RATE="topology.stats.sample.rate"; 624 public static final Object TOPOLOGY_STATS_SAMPLE_RATE_SCHEMA = Number.class; 625 626 /** 627 * The time period that builtin metrics data in bucketed into. 628 */ 629 public static final String TOPOLOGY_BUILTIN_METRICS_BUCKET_SIZE_SECS="topology.builtin.metrics.bucket.size.secs"; 630 public static final Object TOPOLOGY_BUILTIN_METRICS_BUCKET_SIZE_SECS_SCHEMA = Number.class; 631 632 /** 633 * Whether or not to use Java serialization in a topology. 634 */ 635 public static final String TOPOLOGY_FALL_BACK_ON_JAVA_SERIALIZATION="topology.fall.back.on.java.serialization"; 636 public static final Object TOPOLOGY_FALL_BACK_ON_JAVA_SERIALIZATION_SCHEMA = Boolean.class; 637 638 /** 639 * Topology-specific options for the worker child process. This is used in addition to WORKER_CHILDOPTS. 640 */ 641 public static final String TOPOLOGY_WORKER_CHILDOPTS="topology.worker.childopts"; 642 public static final Object TOPOLOGY_WORKER_CHILDOPTS_SCHEMA = String.class; 643 644 /** 645 * This config is available for TransactionalSpouts, and contains the id ( a String) for 646 * the transactional topology. This id is used to store the state of the transactional 647 * topology in Zookeeper. 648 */ 649 public static final String TOPOLOGY_TRANSACTIONAL_ID="topology.transactional.id"; 650 public static final Object TOPOLOGY_TRANSACTIONAL_ID_SCHEMA = String.class; 651 652 /** 653 * A list of task hooks that are automatically added to every spout and bolt in the topology. An example 654 * of when you'd do this is to add a hook that integrates with your internal 655 * monitoring system. These hooks are instantiated using the zero-arg constructor. 656 */ 657 public static final String TOPOLOGY_AUTO_TASK_HOOKS="topology.auto.task.hooks"; 658 public static final Object TOPOLOGY_AUTO_TASK_HOOKS_SCHEMA = ConfigValidation.StringsValidator; 659 660 661 /** 662 * The size of the Disruptor receive queue for each executor. Must be a power of 2. 663 */ 664 public static final String TOPOLOGY_EXECUTOR_RECEIVE_BUFFER_SIZE="topology.executor.receive.buffer.size"; 665 public static final Object TOPOLOGY_EXECUTOR_RECEIVE_BUFFER_SIZE_SCHEMA = ConfigValidation.PowerOf2Validator; 666 667 /** 668 * The maximum number of messages to batch from the thread receiving off the network to the 669 * executor queues. Must be a power of 2. 670 */ 671 public static final String TOPOLOGY_RECEIVER_BUFFER_SIZE="topology.receiver.buffer.size"; 672 public static final Object TOPOLOGY_RECEIVER_BUFFER_SIZE_SCHEMA = ConfigValidation.PowerOf2Validator; 673 674 /** 675 * The size of the Disruptor send queue for each executor. Must be a power of 2. 676 */ 677 public static final String TOPOLOGY_EXECUTOR_SEND_BUFFER_SIZE="topology.executor.send.buffer.size"; 678 public static final Object TOPOLOGY_EXECUTOR_SEND_BUFFER_SIZE_SCHEMA = ConfigValidation.PowerOf2Validator; 679 680 /** 681 * The size of the Disruptor transfer queue for each worker. 682 */ 683 public static final String TOPOLOGY_TRANSFER_BUFFER_SIZE="topology.transfer.buffer.size"; 684 public static final Object TOPOLOGY_TRANSFER_BUFFER_SIZE_SCHEMA = Number.class; 685 686 /** 687 * How often a tick tuple from the "__system" component and "__tick" stream should be sent 688 * to tasks. Meant to be used as a component-specific configuration. 689 */ 690 public static final String TOPOLOGY_TICK_TUPLE_FREQ_SECS="topology.tick.tuple.freq.secs"; 691 public static final Object TOPOLOGY_TICK_TUPLE_FREQ_SECS_SCHEMA = Number.class; 692 693 694 /** 695 * Configure the wait strategy used for internal queuing. Can be used to tradeoff latency 696 * vs. throughput 697 */ 698 public static final String TOPOLOGY_DISRUPTOR_WAIT_STRATEGY="topology.disruptor.wait.strategy"; 699 public static final Object TOPOLOGY_DISRUPTOR_WAIT_STRATEGY_SCHEMA = String.class; 700 701 /** 702 * The size of the shared thread pool for worker tasks to make use of. The thread pool can be accessed 703 * via the TopologyContext. 704 */ 705 public static final String TOPOLOGY_WORKER_SHARED_THREAD_POOL_SIZE="topology.worker.shared.thread.pool.size"; 706 public static final Object TOPOLOGY_WORKER_SHARED_THREAD_POOL_SIZE_SCHEMA = Number.class; 707 708 /** 709 * The interval in seconds to use for determining whether to throttle error reported to Zookeeper. For example, 710 * an interval of 10 seconds with topology.max.error.report.per.interval set to 5 will only allow 5 errors to be 711 * reported to Zookeeper per task for every 10 second interval of time. 712 */ 713 public static final String TOPOLOGY_ERROR_THROTTLE_INTERVAL_SECS="topology.error.throttle.interval.secs"; 714 public static final Object TOPOLOGY_ERROR_THROTTLE_INTERVAL_SECS_SCHEMA = Number.class; 715 716 /** 717 * See doc for TOPOLOGY_ERROR_THROTTLE_INTERVAL_SECS 718 */ 719 public static final String TOPOLOGY_MAX_ERROR_REPORT_PER_INTERVAL="topology.max.error.report.per.interval"; 720 public static final Object TOPOLOGY_MAX_ERROR_REPORT_PER_INTERVAL_SCHEMA = Number.class; 721 722 723 /** 724 * How often a batch can be emitted in a Trident topology. 725 */ 726 public static final String TOPOLOGY_TRIDENT_BATCH_EMIT_INTERVAL_MILLIS="topology.trident.batch.emit.interval.millis"; 727 public static final Object TOPOLOGY_TRIDENT_BATCH_EMIT_INTERVAL_MILLIS_SCHEMA = Number.class; 728 729 /** 730 * Name of the topology. This config is automatically set by Storm when the topology is submitted. 731 */ 732 public static final String TOPOLOGY_NAME="topology.name"; 733 public static final Object TOPOLOGY_NAME_SCHEMA = String.class; 734 735 /** 736 * Max pending tuples in one ShellBolt 737 */ 738 public static final String TOPOLOGY_SHELLBOLT_MAX_PENDING="topology.shellbolt.max.pending"; 739 public static final Object TOPOLOGY_SHELLBOLT_MAX_PENDING_SCHEMA = Number.class; 740 741 /** 742 * The root directory in ZooKeeper for metadata about TransactionalSpouts. 743 */ 744 public static final String TRANSACTIONAL_ZOOKEEPER_ROOT="transactional.zookeeper.root"; 745 public static final Object TRANSACTIONAL_ZOOKEEPER_ROOT_SCHEMA = String.class; 746 747 /** 748 * The list of zookeeper servers in which to keep the transactional state. If null (which is default), 749 * will use storm.zookeeper.servers 750 */ 751 public static final String TRANSACTIONAL_ZOOKEEPER_SERVERS="transactional.zookeeper.servers"; 752 public static final Object TRANSACTIONAL_ZOOKEEPER_SERVERS_SCHEMA = ConfigValidation.StringsValidator; 753 754 /** 755 * The port to use to connect to the transactional zookeeper servers. If null (which is default), 756 * will use storm.zookeeper.port 757 */ 758 public static final String TRANSACTIONAL_ZOOKEEPER_PORT="transactional.zookeeper.port"; 759 public static final Object TRANSACTIONAL_ZOOKEEPER_PORT_SCHEMA = Number.class; 760 761 /** 762 * The number of threads that should be used by the zeromq context in each worker process. 763 */ 764 public static final String ZMQ_THREADS = "zmq.threads"; 765 public static final Object ZMQ_THREADS_SCHEMA = Number.class; 766 767 /** 768 * How long a connection should retry sending messages to a target host when 769 * the connection is closed. This is an advanced configuration and can almost 770 * certainly be ignored. 771 */ 772 public static final String ZMQ_LINGER_MILLIS = "zmq.linger.millis"; 773 public static final Object ZMQ_LINGER_MILLIS_SCHEMA = Number.class; 774 775 /** 776 * The high water for the ZeroMQ push sockets used for networking. Use this config to prevent buffer explosion 777 * on the networking layer. 778 */ 779 public static final String ZMQ_HWM = "zmq.hwm"; 780 public static final Object ZMQ_HWM_SCHEMA = Number.class; 781 782 /** 783 * This value is passed to spawned JVMs (e.g., Nimbus, Supervisor, and Workers) 784 * for the java.library.path value. java.library.path tells the JVM where 785 * to look for native libraries. It is necessary to set this config correctly since 786 * Storm uses the ZeroMQ and JZMQ native libs. 787 */ 788 public static final String JAVA_LIBRARY_PATH = "java.library.path"; 789 public static final Object JAVA_LIBRARY_PATH_SCHEMA = String.class; 790 791 /** 792 * The path to use as the zookeeper dir when running a zookeeper server via 793 * "storm dev-zookeeper". This zookeeper instance is only intended for development; 794 * it is not a production grade zookeeper setup. 795 */ 796 public static final String DEV_ZOOKEEPER_PATH = "dev.zookeeper.path"; 797 public static final Object DEV_ZOOKEEPER_PATH_SCHEMA = String.class; 798 799 /** 800 * A map from topology name to the number of machines that should be dedicated for that topology. Set storm.scheduler 801 * to backtype.storm.scheduler.IsolationScheduler to make use of the isolation scheduler. 802 */ 803 public static final String ISOLATION_SCHEDULER_MACHINES = "isolation.scheduler.machines"; 804 public static final Object ISOLATION_SCHEDULER_MACHINES_SCHEMA = Map.class; 805 806 public static void setDebug(Map conf, boolean isOn) { 807 conf.put(Config.TOPOLOGY_DEBUG, isOn); 808 } 809 810 public void setDebug(boolean isOn) { 811 setDebug(this, isOn); 812 } 813 814 @Deprecated 815 public void setOptimize(boolean isOn) { 816 put(Config.TOPOLOGY_OPTIMIZE, isOn); 817 } 818 819 public static void setNumWorkers(Map conf, int workers) { 820 conf.put(Config.TOPOLOGY_WORKERS, workers); 821 } 822 823 public void setNumWorkers(int workers) { 824 setNumWorkers(this, workers); 825 } 826 827 public static void setNumAckers(Map conf, int numExecutors) { 828 conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, numExecutors); 829 } 830 831 public void setNumAckers(int numExecutors) { 832 setNumAckers(this, numExecutors); 833 } 834 835 public static void setMessageTimeoutSecs(Map conf, int secs) { 836 conf.put(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS, secs); 837 } 838 839 public void setMessageTimeoutSecs(int secs) { 840 setMessageTimeoutSecs(this, secs); 841 } 842 843 public static void registerSerialization(Map conf, Class klass) { 844 getRegisteredSerializations(conf).add(klass.getName()); 845 } 846 847 public void registerSerialization(Class klass) { 848 registerSerialization(this, klass); 849 } 850 851 public static void registerSerialization(Map conf, Class klass, Class<? extends Serializer> serializerClass) { 852 Map<String, String> register = new HashMap<String, String>(); 853 register.put(klass.getName(), serializerClass.getName()); 854 getRegisteredSerializations(conf).add(register); 855 } 856 857 public void registerSerialization(Class klass, Class<? extends Serializer> serializerClass) { 858 registerSerialization(this, klass, serializerClass); 859 } 860 861 public void registerMetricsConsumer(Class klass, Object argument, long parallelismHint) { 862 HashMap m = new HashMap(); 863 m.put("class", klass.getCanonicalName()); 864 m.put("parallelism.hint", parallelismHint); 865 m.put("argument", argument); 866 867 List l = (List)this.get(TOPOLOGY_METRICS_CONSUMER_REGISTER); 868 if(l == null) { l = new ArrayList(); } 869 l.add(m); 870 this.put(TOPOLOGY_METRICS_CONSUMER_REGISTER, l); 871 } 872 873 public void registerMetricsConsumer(Class klass, long parallelismHint) { 874 registerMetricsConsumer(klass, null, parallelismHint); 875 } 876 877 public void registerMetricsConsumer(Class klass) { 878 registerMetricsConsumer(klass, null, 1L); 879 } 880 881 public static void registerDecorator(Map conf, Class<? extends IKryoDecorator> klass) { 882 getRegisteredDecorators(conf).add(klass.getName()); 883 } 884 885 public void registerDecorator(Class<? extends IKryoDecorator> klass) { 886 registerDecorator(this, klass); 887 } 888 889 public static void setKryoFactory(Map conf, Class<? extends IKryoFactory> klass) { 890 conf.put(Config.TOPOLOGY_KRYO_FACTORY, klass.getName()); 891 } 892 893 public void setKryoFactory(Class<? extends IKryoFactory> klass) { 894 setKryoFactory(this, klass); 895 } 896 897 public static void setSkipMissingKryoRegistrations(Map conf, boolean skip) { 898 conf.put(Config.TOPOLOGY_SKIP_MISSING_KRYO_REGISTRATIONS, skip); 899 } 900 901 public void setSkipMissingKryoRegistrations(boolean skip) { 902 setSkipMissingKryoRegistrations(this, skip); 903 } 904 905 public static void setMaxTaskParallelism(Map conf, int max) { 906 conf.put(Config.TOPOLOGY_MAX_TASK_PARALLELISM, max); 907 } 908 909 public void setMaxTaskParallelism(int max) { 910 setMaxTaskParallelism(this, max); 911 } 912 913 public static void setMaxSpoutPending(Map conf, int max) { 914 conf.put(Config.TOPOLOGY_MAX_SPOUT_PENDING, max); 915 } 916 917 public void setMaxSpoutPending(int max) { 918 setMaxSpoutPending(this, max); 919 } 920 921 public static void setStatsSampleRate(Map conf, double rate) { 922 conf.put(Config.TOPOLOGY_STATS_SAMPLE_RATE, rate); 923 } 924 925 public void setStatsSampleRate(double rate) { 926 setStatsSampleRate(this, rate); 927 } 928 929 public static void setFallBackOnJavaSerialization(Map conf, boolean fallback) { 930 conf.put(Config.TOPOLOGY_FALL_BACK_ON_JAVA_SERIALIZATION, fallback); 931 } 932 933 public void setFallBackOnJavaSerialization(boolean fallback) { 934 setFallBackOnJavaSerialization(this, fallback); 935 } 936 937 private static List getRegisteredSerializations(Map conf) { 938 List ret; 939 if(!conf.containsKey(Config.TOPOLOGY_KRYO_REGISTER)) { 940 ret = new ArrayList(); 941 } else { 942 ret = new ArrayList((List) conf.get(Config.TOPOLOGY_KRYO_REGISTER)); 943 } 944 conf.put(Config.TOPOLOGY_KRYO_REGISTER, ret); 945 return ret; 946 } 947 948 private static List getRegisteredDecorators(Map conf) { 949 List ret; 950 if(!conf.containsKey(Config.TOPOLOGY_KRYO_DECORATORS)) { 951 ret = new ArrayList(); 952 } else { 953 ret = new ArrayList((List) conf.get(Config.TOPOLOGY_KRYO_DECORATORS)); 954 } 955 conf.put(Config.TOPOLOGY_KRYO_DECORATORS, ret); 956 return ret; 957 } 958 }
转http://www.aboutyun.com/thread-8109-1-1.html
posted on 2015-12-03 21:13 ilinux_one 阅读(2511) 评论(0) 编辑 收藏 举报