hive (UserMovieRating)> create table if not exists Users(
> UserID int comment 'user id',
> Gender string comment 'user sex',
> Age int comment '1:Under 18,18:18-24,25:25-34,35:35-44,45:45-49,50:50-55,56:56+',
> Occupation int comment '0-20 represents different jobs',
> ZipCode string comment 'your home zip code')
> row format delimited fields terminated by '\t'
> stored as textfile;
OK
Time taken: 0.249 seconds
hive (UserMovieRating)> load data local inpath '/home/landen/MahoutTest/users.txt' overwrite into table Users;
Copying data from file:/home/landen/MahoutTest/users.txt
Copying file: file:/home/landen/MahoutTest/users.txt
Loading data to table usermovierating.users
Deleted hdfs://Master:9000/home/landen/UntarFile/hive-0.10.0/warehouse/usermovierating.db/users
Table usermovierating.users stats: [num_partitions: 0, num_files: 1, num_rows: 0, total_size: 110208, raw_data_size: 0]
OK
Time taken: 0.745 seconds
hive (UserMovieRating)> select * from Users limit 10;
OK
userid gender age occupation zipcode
1 F 1 10 48067
2 M 56 16 70072
3 M 25 15 55117
4 M 45 7 02460
5 M 25 20 55455
6 F 50 9 55117
7 M 35 1 06810
8 M 25 12 11413
9 M 25 17 61614
10 F 35 1 95370
Time taken: 0.096 seconds
hive (UserMovieRating)> create table if not exists Movies(
> MovieID int comment 'movie id',
> MovieName string comment 'movie name',
> ReleasedDate int comment 'released year',
> MovieType string comment 'movie type')
> row format delimited fields terminated by '\t'
> stored as textfile;
OK
Time taken: 0.183 seconds
hive (UserMovieRating)> show tables;
OK
tab_name
movies
users
Time taken: 0.083 seconds
hive (UserMovieRating)> load data local inpath '/home/landen/MahoutTest/Processedmovies.txt' overwrite into table Movies;
Copying data from file:/home/landen/MahoutTest/Processedmovies.txt
Copying file: file:/home/landen/MahoutTest/Processedmovies.txt
Loading data to table usermovierating.movies
Deleted hdfs://Master:9000/home/landen/UntarFile/hive-0.10.0/warehouse/usermovierating.db/movies
Table usermovierating.movies stats: [num_partitions: 0, num_files: 1, num_rows: 0, total_size: 155900, raw_data_size: 0]
OK
Time taken: 0.695 seconds
hive (UserMovieRating)> select * from Movies limit 10;
OK
movieid moviename releaseddate movietype
1 Toy Story 1995 Animation,Children's,Comedy
2 Jumanji 1995 Adventure,Children's,Fantasy
3 Grumpier Old Men 1995 Comedy,Romance
4 Waiting to Exhale 1995 Comedy,Drama
5 Father of the Bride Part II 1995 Comedy
6 Heat 1995 Action,Crime,Thriller
7 Sabrina 1995 Comedy,Romance
8 Tom and Huck 1995 Adventure,Children's
9 Sudden Death 1995 Action
10 GoldenEye 1995 Action,Adventure,Thriller
Time taken: 0.095 seconds
hive (UserMovieRating)> create table if not exists Rating(
> UserID int comment 'user id',
> MovieID int comment 'movie id(1-3952)',
> Rating int comment 'Ratings are made on a 5-star scale',
> RatingTime string comment 'user rates time')
> row format delimited fields terminated by '\t'
> stored as textfile;
OK
Time taken: 1.3 seconds
hive (UserMovieRating)> load data local inpath '/home/landen/MahoutTest/ProcessedRating.txt' overwrite into table Rating;
Copying data from file:/home/landen/MahoutTest/ProcessedRating.txt
Copying file: file:/home/landen/MahoutTest/ProcessedRating.txt
Loading data to table usermovierating.rating
Deleted hdfs://Master:9000/home/landen/UntarFile/hive-0.10.0/warehouse/usermovierating.db/rating
Table usermovierating.rating stats: [num_partitions: 0, num_files: 1, num_rows: 0, total_size: 21593504, raw_data_size: 0]
OK
Time taken: 3.293 seconds
hive (UserMovieRating)> select * from Rating limit 10;
OK
userid movieid rating ratingtime
1 1193 5 978300760
1 661 3 978302109
1 914 3 978301968
1 3408 4 978300275
1 2355 5 978824291
1 1197 3 978302268
1 1287 5 978302039
1 2804 5 978300719
1 594 4 978302268
1 919 4 978301368
Time taken: 0.658 seconds
hive (UserMovieRating)> describe users;
OK
col_name data_type comment
userid int user id
gender string user sex
age int 1:Under 18,18:18-24,25:25-34,35:35-44,45:45-49,50:50-55,56:56+
occupation int 0-20 represents different jobs
zipcode string your home zip code
Time taken: 0.514 seconds
hive (UserMovieRating)> describe movies;
OK
col_name data_type comment
movieid int movie id
moviename string movie name
releaseddate int released year
movietype string movie type
Time taken: 0.085 seconds
hive (UserMovieRating)> describe ratings;
FAILED: SemanticException [Error 10001]: Table not found ratings
hive (UserMovieRating)> describe rating;
OK
col_name data_type comment
userid int user id
movieid int movie id(1-3952)
rating int Ratings are made on a 5-star scale
ratingtime string user rates time
Time taken: 0.121 seconds
Users,Movies,Rating三表联合查询:
hive (UserMovieRating)> select u.userid,u.occupation,m.moviename,r.rating
> from rating r
> join users u on r.userid = u.userid
> join movies m on r.movieid = m.movieid;