Flink DataStream Source(二)

Flink Source

上下文环境
      import org.apache.flink.api.scala.ExecutionEnvironment
      val env = ExecutionEnvironment.getExecutionEnvironment;//批处理运行上下文环境

      import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
      val streamenv = StreamExecutionEnvironment.getExecutionEnvironment//流处理运行上下文环境
对象、文本、socket Source
      import org.apache.flink.streaming.api.scala.createTypeInformation
      streamenv.fromElements[String]("1","2","3","4","5").print()
      streamenv.fromCollection(Array("6","7","8","9","10")).print()
      streamenv.readTextFile("/data/qujian.csv").print()
      streamenv.socketTextStream("***.***.***.***",7777).print()
读Parquet
      import org.apache.flink.formats.parquet.ParquetRowInputFormat
      import org.apache.flink.core.fs.Path
//      long 可以用 INT64 替换
//      |-- a: string (nullable = true)
//      |-- b: long (nullable = true)
//      |-- c: string (nullable = true)
//      |-- d: string (nullable = true)
//      |-- e: long (nullable = true)
//      |-- f: long (nullable = true)
//      |-- g: string (nullable = true)
//      |-- h: long (nullable = true)
//      |-- i: long (nullable = true)
//      |-- j: integer (nullable = true)
      import org.apache.parquet.schema.{MessageType, PrimitiveType}
      import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
      import org.apache.parquet.schema.Type.Repetition
      val a = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "a")
      val b = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT64, "b")
      val c = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "c")
      val d = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "d")
      val e = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT64, "e")
      val f = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT64, "f")
      val g = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "g")
      val h = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT64, "h")
      val i = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT64, "i")
      val j = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "j")

      val schema = new MessageType("dataschema", a, b,c,d,e,f,g,h,i,j)
      streamenv.readFile(new ParquetRowInputFormat(
         new Path("/data/data.parquet"), schema),"/data/data.parquet").print()
         //没有写错,就是需要写两遍路径
读kafka 方式一
    import org.apache.flink.connector.kafka.source.KafkaSource
    import org.apache.flink.api.common.serialization.SimpleStringSchema
    import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
    import org.apache.flink.api.common.eventtime.WatermarkStrategy
    import org.apache.flink.streaming.api.scala.createTypeInformation
    val source = KafkaSource.builder()
      .setBootstrapServers("127.0.0.1:9092")
      .setTopics("events")
      .setGroupId("group")
      .setStartingOffsets(OffsetsInitializer.earliest())
      .setValueOnlyDeserializer(new SimpleStringSchema())
      .build();
    streamenv.fromSource(source, WatermarkStrategy.noWatermarks(), "Kafka Source").print()
读kafka 方式二
    import org.apache.flink.api.common.serialization.SimpleStringSchema
    import java.util.Properties
    import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
    val properties = new Properties();
    properties.setProperty("bootstrap.servers""127.0.0.1:9092");
    properties.setProperty("auto.offset.reset""earliest");
    properties.setProperty("group.id""group");
    val kafkaConsumer = new FlinkKafkaConsumer[String]("events",new SimpleStringSchema(),properties);
    streamenv.addSource(kafkaConsumer).print()
读jdbc 方式一
    import org.apache.flink.connector.jdbc.JdbcInputFormat
    import org.apache.flink.api.common.typeinfo.{BasicTypeInfo}
    import org.apache.flink.api.java.typeutils.RowTypeInfo
    val jdbcInputFormat JdbcInputFormat.buildJdbcInputFormat()
      .setDrivername("com.mysql.jdbc.Driver")
      .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?characterEncoding=UTF-8")
      .setUsername("root").setPassword("123456")
      .setQuery("SELECT id,name,score FROM student")
      .setRowTypeInfo(new RowTypeInfo(BasicTypeInfo.INT_TYPE_INFO, 
           BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO))
      .finish();
    streamenv.createInput(jdbcInputFormat).print()
读jdbc 方式二
    import java.sql.{Connection, DriverManager, PreparedStatement}
    import org.apache.flink.configuration.Configuration
    import org.apache.flink.streaming.api.functions.source.RichSourceFunction
    import org.apache.flink.streaming.api.functions.source.SourceFunction
    class MyRichSourcejdbc  extends RichSourceFunction[(Int,String,Int)]{
      var conn:Connection = _
      var selectStatement:PreparedStatement = _
      override def open(parameters: Configuration): Unit = {
        conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:3306/test","root","123456")
        selectStatement = conn.prepareStatement("SELECT id,name,score FROM student");
      }
      override def run(ctx: SourceFunction.SourceContext[(Int,String,Int)]): Unit = {
        val resultSet = selectStatement.executeQuery()
        while (resultSet.next()) {
          ctx.collect(resultSet.getInt(1),resultSet.getString(2),resultSet.getInt(3))
        }
      }
      override def cancel(): Unit = {
        selectStatement.close()
        conn.close()
      }
    }
    streamenv.addSource(new MyRichSourcejdbc()).print()//写MySQL方法
读redis
      import org.apache.flink.configuration.Configuration
      import org.apache.flink.streaming.api.functions.source.{RichSourceFunction,SourceFunction}
      import redis.clients.jedis.{JedisPool,JedisPoolConfig,Protocol}
      class MyRedisSource extends RichSourceFunction[(StringString)]() {
        var jedisPool:JedisPool = _
        override def open(parameters: Configuration): Unit = {
          jedisPool = new JedisPool(new JedisPoolConfig, "127.0.0.1"6379, Protocol.DEFAULT_TIMEOUT)
        }
        override def run(ctx:SourceFunction.SourceContext[(StringString)]): Unit = {
          val jedis = jedisPool.getResource()
          ctx.collect(("test",jedis.get("test")))
          jedis.close()//实现RichSourceFunction抽象方法,加载数据源数据到流中
        }
        override def cancel(): Unit = {
          jedisPool.close()
        }
      }
      import org.apache.flink.streaming.api.scala.createTypeInformation
      streamenv.addSource(new MyRedisSource()).print()
读redis异步
      import org.apache.flink.configuration.Configuration
      import org.apache.flink.streaming.api.scala.async.{ResultFuture, RichAsyncFunction}
      import redis.clients.jedis.{JedisPool,JedisPoolConfig,Protocol}
      import org.apache.flink.streaming.api.scala.{AsyncDataStream}
      import java.util.concurrent.TimeUnit
      import scala.concurrent.{Future,ExecutionContext}
      //基本逻辑就是一样的   但是异步的方法 你不知道它几十会执行  这个也是最大的缺陷
      //flink就是要实时算出来,这个特点和flink不匹配了
      class RedisAsyncFunction extends  RichAsyncFunction[String,String]{
        var jedisPool:JedisPool = _
        override def open(parameters: Configuration): Unit = {
          jedisPool = new JedisPool(new JedisPoolConfig, "127.0.0.1"6379, Protocol.DEFAULT_TIMEOUT)
        }
        override def asyncInvoke(input: String, resultFuture: ResultFuture[String]): Unit = {
          val jedis = jedisPool.getResource()
          println(jedis.get(input))
          if(jedis.get(input) != null){
            resultFuture.complete(Array(jedis.get(input)))
          }else{
            resultFuture.complete(Array("-99"))
          }
          jedis.close()
        }
        override def close(): Unit = {
          jedisPool.close()
        }
      }
      val stream = streamenv.fromElements[String]("test","test1","test2","test3","test4","test5")
      AsyncDataStream.unorderedWait(stream,new RedisAsyncFunction(), 10000, TimeUnit.MILLISECONDS, 100).print()
posted @   Kotlin  阅读(114)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?
Live2D
点击右上角即可分享
微信分享提示
西雅图
14:14发布
西雅图
14:14发布
4°
东南风
2级
空气质量
相对湿度
92%
今天
3°/12°
周四
4°/11°
周五
2°/10°