www poster 代码公布
以下代码是www 2017 poster 的代码公布,实验数据如下面的一个用户的数据:
using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; namespace single_user_movie { public class SequenceEle { public string uid; public string tid; public int tIndex; public DateTime time; public int timeIndex; } class Program { static void Main(string[] args) { //将其一次写入文件中 var in1 = ""; var in2 = ""; //两个输入路径 List <string> ls = new List<string>(); var seqq = File.ReadAllLines(in1).Select( dp => { try { var sp = dp.Split('\t'); return new SequenceEle() { uid = sp[0], time = DateTime.Parse(sp[2]), tid = sp[1], }; } catch (Exception e) { return null; } }).Where(dp => dp != null).ToList(); var seqq1 = File.ReadAllLines(in2).Select( dp => { try { var sp = dp.Split('\t'); if (!ls.Contains(sp[0])) ls.Add(sp[0]); return new SequenceEle() { uid = sp[0], time = DateTime.Parse(sp[2]), tid = sp[1], }; } catch (Exception e) { return null; } }).Where(dp => dp != null).ToList(); //临时存放文件和最后文件结果 var input = ""; var input1 = ""; var outputFile = ""; List<string> res = new List<string>(); foreach (string st in ls) { //将准备好的数据输出至临时文件中 StreamWriter sw = new StreamWriter(input); StreamWriter sw1 = new StreamWriter(input1); foreach (SequenceEle sq in seqq1) { if (sq.uid.Equals(st)) { sw1.WriteLine(sq.uid.ToString() + '\t' + sq.tid + '\t' + sq.time.ToString()); } } sw1.Close(); var seq1 = File.ReadAllLines(input1).Select(dp => { try { var sp = dp.Split('\t'); return new SequenceEle() { uid = sp[0], time = DateTime.Parse(sp[2]), tid = sp[1] }; } catch (Exception e) { return null; } }).Where(dp => dp != null) .OrderBy(dp => dp.time) .ToList(); DateTime time1 = seq1[0].time; DateTime time2 = seq1.Last().time; foreach (SequenceEle sq in seqq) { if (sq.uid.Equals(st) && DateTime.Compare(time1, sq.time) <= 0 && DateTime.Compare(time2, sq.time) >= 0) { sw.WriteLine(sq.uid.ToString() + '\t' + sq.tid + '\t' + sq.time.ToString()); } } sw.Close(); var seq = File.ReadAllLines(input).Select(dp => { try { var sp = dp.Split('\t'); return new SequenceEle() { uid = sp[0], time = DateTime.Parse(sp[2]), tid = sp[1] }; } catch (Exception e) { return null; } }).Where(dp => dp != null) .OrderBy(dp => dp.time) .ToList(); if (seq.Count <= 500 && seq1.Count <= 500) { int index = 0; var tidIndexDic = seq.Select(dp => dp.tid).Distinct().ToDictionary(dp => dp, dp => index++); for (int i = 0; i < seq.Count; i++) { seq[i].timeIndex = i; seq[i].tIndex = tidIndexDic[seq[i].tid]; } var selfMatrix = DCN.ComputeSelf(seq); int n = seq.Count(); int m = tidIndexDic.Count(); int K = 9; int index1 = 0; var tidIndexDic1 = seq1.Select(dp => dp.tid).Distinct().ToDictionary(dp => dp, dp => index1++); for (int i = 0; i < seq1.Count; i++) { seq1[i].timeIndex = i; seq1[i].tIndex = tidIndexDic1[seq1[i].tid]; } var selfMatrix1 = DCN.ComputeSelf(seq1); int n1 = seq1.Count(); int m1 = tidIndexDic1.Count(); int K1 = 9; Inference.InitialParameter(m, n - 1, K); Inference.InitialParameter1(m1, n1 - 1, K1); for (int i = 0; i < 80; i++) { try { Inference.GibbsSamplingEach(seq.Take(n - 1).ToList(), selfMatrix); } catch (Exception e) { } try { Inference.GibbsSamplingEach1(seq1.Take(n1 - 1).ToList(), selfMatrix1); } catch (Exception e) { } } // predict var predicts = new List<Tuple<int, double>>(); for (int x = 0; x < m1; x++) { double p = 0.0; for (int k = 0; k < K; k++) { try { p += Math.Pow(10, 10) * Inference.theta.P[k] * Inference.phi_t[seq1[n1 - 2].tIndex].P[x] * Math.Exp( (-1) * Math.Pow((k + 1 - selfMatrix1[n1 - 2][x] ), 2) ); } catch (Exception e) { } } predicts.Add(Tuple.Create(x, p)); } predicts = predicts.OrderByDescending(dp => dp.Item2).ToList(); double mean = 0.0; for (int k = 0; k < 10; k++) { try { mean += (k + 1) * Inference.theta.P[k]; } catch (Exception e) { } } var MAP = 0.0; for (int i = 0; i < predicts.Count(); i++) { try { if (tidIndexDic1.Where(dp => dp.Value == predicts.ElementAt(i).Item1).First().Key == seq1[n1 - 1].tid) { MAP = i + 1; } } catch (Exception e) { } } int ns = (int)Math.Round(mean); try { res.Add(seq1.ElementAt(0).uid.ToString() + '\t' + ns.ToString() + '\t' + MAP.ToString()); } catch (Exception e) { } } } File.WriteAllLines(outputFile, res); //此时预留的接口是用户ID,猎奇水平、NDCG以及排序 } } public class DCN { public static int[][] ComputeSelf(List<SequenceEle> seq) { int n = seq.Count(); int m = seq.Select(dp => dp.tid).Distinct().Count(); int[][] matrix = new int[n][]; for (int i = 0; i < n; i++) matrix[i] = new int[m]; for (int i = 0; i < n; i++) { if (i == 0) { for (int j = 0; j < m; j++) matrix[i][j] = 1; } else { var dic = new Dictionary<int, int>(); for (int j = 0; j < m; j++) { dic.Add(j, 1); for (int k = 0; k < i; k++) { //解析出第k次的tid的每一项 var str = seq[k].tid.Split(';'); //解析出第j次的tid的每一项 var str1 = seq[j].tid.Split(';'); //如果j中的tid项的任意一个出现在第k项中的每一项,则进行加一 foreach (string str2 in str1) { if (str.Contains(str2)) { dic[j]++; } } } for (int k = 1; k < i; k++) { //计算转移 //解析出第k次的tid的每一项 var str = seq[k].tid.Split(';'); //解析出第j次的tid的每一项 var str1 = seq[j].tid.Split(';'); var str3 = seq[k - 1].tid.Split(';'); //解析出第j次的tid的每一项 var str4 = seq[i - 1].tid.Split(';'); bool flag1 = false; bool flag2 = false; foreach (string str2 in str1) { if (str.Contains(str2)) { flag1 = true; } } foreach (string str5 in str4) { if (str3.Contains(str5)) { flag2 = true; } } if (flag1 == true && flag2 == true) dic[j]++; } } dic = dic.OrderByDescending(dp => dp.Value).ToDictionary(dp => dp.Key, dp => dp.Value); var val = 1; matrix[i][dic.First().Key] = 1; var preFre = dic.First().Value; for (int index = 1; index < dic.Count; index++) { if (dic.ElementAt(index).Value != preFre) { val++; preFre = dic.ElementAt(index).Value; } matrix[i][dic.ElementAt(index).Key] = val; } } } return matrix; } } public class Inference { public static int n; public static int m; public static int n1; public static int m1; public static int k; public static double[] alpha_s; public static double[] alpha_t; public static double[] beta; // public static double[] beta_t; public static MathNet.Numerics.Distributions.Categorical theta; // public static MathNet.Numerics.Distributions.Categorical theta_t; public static MathNet.Numerics.Distributions.Categorical[] phi_s; public static MathNet.Numerics.Distributions.Categorical[] phi_t; public static int[] zArr_s; public static int[] zArr_t; public static void InitialParameter(int _m, int _n, int _k) { m = _m + 1; n = _n; k = _k; alpha_s = Enumerable.Range(0, m - 1).Select(dp => 1.0).ToArray(); beta = Enumerable.Range(0, k).Select(dp => 1.0).ToArray(); try { MathNet.Numerics.Distributions.Dirichlet dPhi = new MathNet.Numerics.Distributions.Dirichlet(alpha_s); MathNet.Numerics.Distributions.Dirichlet dTheta = new MathNet.Numerics.Distributions.Dirichlet(beta); phi_s = new MathNet.Numerics.Distributions.Categorical[m]; zArr_s = new int[n]; var p = new double[k]; for (int i = 0; i < k; i++) p[i] = 1.0 / k; theta = new MathNet.Numerics.Distributions.Categorical( dTheta.Sample() // p ); for (int i = 0; i < n; i++) { zArr_s[i] = theta.Sample(); } for (int i = 0; i < m; i++) { phi_s[i] = new MathNet.Numerics.Distributions.Categorical( dPhi.Sample() ); } } catch (Exception e) { } } public static void InitialParameter1(int _m, int _n, int _k) { m1 = _m + 1; n1 = _n; k = _k; alpha_t = Enumerable.Range(0, m1 - 1).Select(dp => 1.0).ToArray(); //beta = Enumerable.Range(0, k).Select(dp => 1.0).ToArray(); try { MathNet.Numerics.Distributions.Dirichlet dPhi = new MathNet.Numerics.Distributions.Dirichlet(alpha_t); MathNet.Numerics.Distributions.Dirichlet dTheta = new MathNet.Numerics.Distributions.Dirichlet(beta); phi_t = new MathNet.Numerics.Distributions.Categorical[m1]; zArr_t = new int[n1]; var p = new double[k]; for (int i = 0; i < k; i++) p[i] = 1.0 / k; // theta = new MathNet.Numerics.Distributions.Categorical( //dTheta.Sample() // p // ); for (int i = 0; i < n1; i++) { zArr_t[i] = theta.Sample(); } for (int i = 0; i < m1; i++) { phi_t[i] = new MathNet.Numerics.Distributions.Categorical( dPhi.Sample() ); } } catch (Exception e) { } } public static void GibbsSampling(List<SequenceEle> seq, int[][] matrix, int iter) { for (int i = 0; i < iter; i++) { // GibbsSamplingEach(seq, matrix); } } public static void GibbsSamplingEach(List<SequenceEle> seq, int[][] matrix) { for (int i = 0; i < n; i++) { //draw z double[] p = new double[k]; for (int z = 0; z < k; z++) { var theta_tmp = theta.P[z]; var phi_tmp = 0.0; if (i == 0) phi_tmp = phi_s[m - 1].P[seq[i].tIndex]; else phi_tmp = phi_s[seq[i - 1].tIndex].P[seq[i].tIndex]; if (phi_tmp <= 0) phi_tmp = 1.0 / Math.Pow(10, 100); var f = Math.Exp( (-1) * Math.Pow((z + 1 - matrix[i][seq[i].tIndex] ), 2) ); p[z] = Math.Pow(10, 10) * theta_tmp * phi_tmp * f; } try { MathNet.Numerics.Distributions.Categorical dis = new MathNet.Numerics.Distributions.Categorical(p); zArr_s[i] = dis.Sample(); } catch (Exception e) { } //draw theta var beta_tmp = new double[k]; try { beta.CopyTo(beta_tmp, 0); } catch (Exception e) { } foreach (var g in zArr_s.GroupBy(dp => dp)) { beta_tmp[g.Key] += g.Count(); } var d_tmp = new MathNet.Numerics.Distributions.Dirichlet(beta_tmp); theta = new MathNet.Numerics.Distributions.Categorical(d_tmp.Sample()); //draw phi for (int j = 0; j < m; j++) { p = Enumerable.Range(0, m).Select(dp => 1.0).ToArray(); for (int ii = 0; ii < n; ii++) { if (j == m - 1 && ii == 0) { var z = zArr_s[ii]; var f = Math.Exp( (-1) * Math.Pow((z - matrix[ii][seq[ii].tIndex] / ((double)matrix[ii].Max()) * k), 2) ); p[seq[ii].tIndex] *= phi_s[j].P[seq[ii].tIndex] * f; } else { if (ii == 0) continue; if (seq[ii - 1].tIndex == j) { var z = zArr_s[ii]; var f = Math.Exp( (-1) * Math.Pow((z + 1 - matrix[ii][seq[ii].tIndex] ), 2) ); p[seq[ii].tIndex] *= phi_s[j].P[seq[ii].tIndex] * f; } } } for (int s = 0; s < p.Length; s++) if (p[s] == 1.0 || p[s] <= 0) p[s] = 1.0 / Math.Pow(10, 100); try { phi_s[j] = new MathNet.Numerics.Distributions.Categorical(p); } catch (Exception e) { } } } } public static void GibbsSamplingEach1(List<SequenceEle> seq, int[][] matrix) { for (int i = 0; i < n1; i++) { // i = 22230; //draw z // for (int ii = 0; ii < zArr_t.Count(); ii++) // Console.Write(zArr_t[ii] + " "); double[] p = new double[k]; for (int z = 0; z < k; z++) { var theta_tmp = theta.P[z]; var phi_tmp = 0.0; if (i == 0) phi_tmp = phi_t[m1 - 1].P[seq[i].tIndex]; else phi_tmp = phi_t[seq[i - 1].tIndex].P[seq[i].tIndex]; if (phi_tmp <= 0) phi_tmp = 1.0 / Math.Pow(10, 100); var f = Math.Exp( (-1) * Math.Pow((z + 1 - matrix[i][seq[i].tIndex] ), 2) ); p[z] = Math.Pow(10, 10) * theta_tmp * phi_tmp * f; } try { MathNet.Numerics.Distributions.Categorical dis = new MathNet.Numerics.Distributions.Categorical(p); zArr_t[i] = dis.Sample(); } catch (Exception e) { } // if(i<n) // zArr_t[i] = (5*zArr_s[i] + 8*zArr_t[i])/13; //draw theta var beta_tmp = new double[k]; try { beta.CopyTo(beta_tmp, 0); } catch (Exception e) { } foreach (var g in zArr_t.GroupBy(dp => dp)) { beta_tmp[g.Key] += g.Count(); } var d_tmp = new MathNet.Numerics.Distributions.Dirichlet(beta_tmp); theta = new MathNet.Numerics.Distributions.Categorical(d_tmp.Sample()); //for (int sss = 0; sss < k; sss++) //{ // theta_t= theta_t. +theta_s; //} // Console.WriteLine(theta.Mean+" "+theta.Entropy); //draw phi for (int j = 0; j < m1; j++) { p = Enumerable.Range(0, m1).Select(dp => 1.0).ToArray(); for (int ii = 0; ii < n1; ii++) { if (j == m1 - 1 && ii == 0) { var z = zArr_t[ii]; var f = Math.Exp( (-1) * Math.Pow((z - matrix[ii][seq[ii].tIndex] / ((double)matrix[ii].Max()) * k), 2) ); p[seq[ii].tIndex] *= phi_t[j].P[seq[ii].tIndex] * f; } else { if (ii == 0) continue; if (seq[ii - 1].tIndex == j) { var z = zArr_t[ii]; var f = Math.Exp( (-1) * Math.Pow((z + 1 - matrix[ii][seq[ii].tIndex] ), 2) ); p[seq[ii].tIndex] *= phi_t[j].P[seq[ii].tIndex] * f; } } } for (int s = 0; s < p.Length; s++) if (p[s] == 1.0 || p[s] <= 0) p[s] = 1.0 / Math.Pow(10, 100); try { phi_t[j] = new MathNet.Numerics.Distributions.Categorical(p); } catch (Exception e) { } } } } } }