pyflink-练习1

from pyarrow.filesystem import FileSystem
from pyflink.table import EnvironmentSettings, TableEnvironment, DataTypes

#Init enviroment
#enviroment_setting = EnvironmentSettings.new_instance().use_blink_planner().in_batch_mode().build()
#t_env = TableEnvironment.create(enviroment_setting=enviroment_setting)

t_env = TableEnvironment.create(EnvironmentSettings.in_batch_mode())

#Register Source
t_env.execute_sql("""
CREATE TABLE mySource (
word String
) WITH (
'connector' = 'filesystem',
'format' = 'csv',
'path' = '/home/zhangjunbo/examples/data/word_count_input'
)
""")


#Register Sink
t_env.execute_sql("""
CREATE TABLE mySink(
word STRING,
count_value BIGINT
) WITH (
'connector' = 'filesystem',
'format' = 'csv',
'path' = '/home/zhangjunbo/examples/data/word_count_output'
)
""")

#Business Logical
t_env.from_path('mySource') \
.group_by('word')\
.select('word,count(1)')\
.insert_into('mySink')

#excute
t_env.execute("1-word_count")

posted @ 2022-02-10 18:31  zjb480  阅读(107)  评论(0编辑  收藏  举报