private void computeItemSim() { List<Integer> sortedItems = new ArrayList<Integer>(); sortedItems.addAll(items); Collections.sort(sortedItems); int id1, id2; for (int i = 0; i < sortedItems.size() - 1; i++) { id1 = sortedItems.get(i); this.itemSim.put(id1, new TIntFloatHashMap()); for (int j = i + 1; j < sortedItems.size(); j++) { id2 = sortedItems.get(j); float val = 0; if (map_item_intFeatures.containsKey(id1) & map_item_intFeatures.containsKey(id2)) val = cmpJaccardSim(this.map_item_intFeatures.get(id1) .keySet(), this.map_item_intFeatures.get(id2) .keySet()); if (val > MIN_SIM) { itemSim.get(id1).put(id2, val); } } } }
/** * Normalize a vector to unit length. * @param X * @return */ public static TIntFloatMap normalizeVector(TIntFloatMap X) { TIntFloatHashMap Y = new TIntFloatHashMap(); double sumSquares = 0.0; for (double x : X.values()) { sumSquares += x * x; } if (sumSquares != 0.0) { double norm = Math.sqrt(sumSquares); for (int id : X.keys()) { Y.put(id, (float) (X.get(id) / norm)); } return Y; } return X; }
/** * Construct with the given parameters. * * @param minRad * minimum search radius * @param maxRad * maximum search radius * @param radIncrement * amount to increment search radius by between min and max. * @param nDegree * number of degree increments */ public HoughCircles(int minRad, int maxRad, int radIncrement, int nDegree) { super(); this.minRad = minRad; if (this.minRad <= 0) this.minRad = 1; this.maxRad = maxRad; this.radmap = new TIntObjectHashMap<TIntObjectHashMap<TIntFloatHashMap>>(); this.radIncr = radIncrement; this.nRadius = (maxRad - minRad) / this.radIncr; this.nDegree = nDegree; this.cosanglemap = new float[nRadius][nDegree]; this.sinanglemap = new float[nRadius][nDegree]; for (int radIndex = 0; radIndex < this.nRadius; radIndex++) { for (int angIndex = 0; angIndex < nDegree; angIndex++) { final double ang = angIndex * (2 * PI / nDegree); final double rad = minRad + (radIndex * this.radIncr); this.cosanglemap[radIndex][angIndex] = (float) (rad * cos(ang)); this.sinanglemap[radIndex][angIndex] = (float) (rad * sin(ang)); } } }
private float distanceEuclidean(int n, HashSparseVector sv, float baseDistance) { HashSparseVector center = classCenter.get(n); int count = classCount.get(n); float dist = baseDistance / (count * count); TIntFloatHashMap data = center.data; TIntFloatIterator it = sv.data.iterator(); while (it.hasNext()) { it.advance(); int key = it.key(); if (!data.containsKey(key)) { dist += it.value() * it.value(); } else { float temp = data.get(key) / count; dist -= temp * temp; dist += (it.value() - temp) * (it.value() - temp); } } return dist; }
private void updateBaseDist(int classid, HashSparseVector vector) { float base = baseDistList.get(classid); TIntFloatHashMap center = classCenter.get(classid).data; TIntFloatIterator it = vector.data.iterator(); while (it.hasNext()) { it.advance(); if (!center.containsKey(it.key())) { base += it.value() * it.value(); } else { float temp = center.get(it.key()); base -= temp * temp; base += (it.value() - temp) * (it.value() - temp); } } baseDistList.set(classid, base); }
private float getweight(int c1, int c2) { int max,min; if(c1<=c2){ max = c2; min = c1; }else{ max = c1; min = c2; } float w; TIntFloatHashMap map2 = wcc.get(min); if(map2==null){ w = 0; }else w = map2.get(max); return w; }
/** * 得到总能量值大于thres的元素对应的下标 * * @param data 稀疏向量 * @param thres * @return 元素下标 int[][] 第一列表示大于阈值的元素 第二列表示小于阈值的元素 */ public static int[][] getTop(TIntFloatHashMap data, float thres) { int[] idx = sort(data); int i; float total = 0; float[] cp = new float[idx.length]; for (i = idx.length; i-- > 0;) { cp[i] = (float) Math.pow(data.get(idx[i]), 2); total += cp[i]; } float ratio = 0; for (i = 0; i < idx.length; i++) { ratio += cp[i] / total; if (ratio > thres) break; } int[][] a = new int[2][]; a[0] = Arrays.copyOfRange(idx, 0, i); a[1] = Arrays.copyOfRange(idx, i, idx.length); return a; }
/** * 由大到小排序 * @param tmap * @return 数组下标 */ public static int[] sort(TIntFloatHashMap tmap) { HashMap<Integer, Float> map = new HashMap<Integer, Float>(); TIntFloatIterator it = tmap.iterator(); while (it.hasNext()) { it.advance(); int id = it.key(); float val = it.value(); map.put(id, Math.abs(val)); } it = null; List<Entry> list = sort(map); int[] idx = new int[list.size()]; Iterator<Entry> it1 = list.iterator(); int i=0; while (it1.hasNext()) { Entry entry = it1.next(); idx[i++] = (Integer) entry.getKey(); } return idx; }
/** * Normalizes the probability values in a vector so that to sum to 1.0 * @param vector * @return */ public static TIntFloatMap normalizeVector(TIntFloatMap vector) { float total = 0; TFloatIterator iter = vector.valueCollection().iterator(); while (iter.hasNext()) total += iter.next(); TIntFloatMap normalized = new TIntFloatHashMap(vector.size()); TIntFloatIterator iter2 = vector.iterator(); while (iter2.hasNext()) { iter2.advance(); normalized.put(iter2.key(), iter2.value() / total); } return normalized; }
@Test public void testGetSortedIndices() { TIntFloatMap m = new TIntFloatHashMap(); m.put(0, 1f); m.put(1, 10f); m.put(2, 5f); m.put(3, 2f); int[] sorted = SemSigUtils.getSortedIndices(m); assertEquals(4, sorted.length); assertEquals(1, sorted[0]); assertEquals(2, sorted[1]); assertEquals(3, sorted[2]); assertEquals(0, sorted[3]); }
/** * 一次性统计概率,节约时间 */ private void statisticProb() { System.out.println("统计概率"); float totalword = alpahbet.size(); TIntFloatIterator it = wordProb.iterator(); while(it.hasNext()){ it.advance(); float v = it.value()/totalword; it.setValue(v); Cluster cluster = new Cluster(it.key(),v,alpahbet.lookupString(it.key())); clusters.put(it.key(), cluster); } TIntObjectIterator<TIntFloatHashMap> it1 = pcc.iterator(); while(it1.hasNext()){ it1.advance(); TIntFloatHashMap map = it1.value(); TIntFloatIterator it2 = map.iterator(); while(it2.hasNext()){ it2.advance(); it2.setValue(it2.value()/totalword); } } }
/** * 由大到小排序 * @param map * @return 数组下标 */ public static int[] sort(TIntFloatHashMap tmap) { HashMap<Integer, Float> map = new HashMap<Integer, Float>(); TIntFloatIterator it = tmap.iterator(); while (it.hasNext()) { it.advance(); int id = it.key(); float val = it.value(); map.put(id, Math.abs(val)); } it = null; List<Entry> list = sort(map); int[] idx = new int[list.size()]; Iterator<Entry> it1 = list.iterator(); int i=0; while (it1.hasNext()) { Entry entry = it1.next(); idx[i++] = (Integer) entry.getKey(); } return idx; }
/** * Construct with the given parameters. * * @param minRad minimum search radius * @param maxRad maximum search radius */ public HoughCircles(int minRad, int maxRad, int radIncrement, int nDegree) { super(); this.minRad = minRad; if(this.minRad <= 0) this.minRad = 1; this.maxRad = maxRad; this.radmap = new TIntObjectHashMap<TIntObjectHashMap<TIntFloatHashMap>>(); this.radIncr = radIncrement; this.nRadius = (maxRad-minRad) / this.radIncr; this.nDegree = nDegree; this.cosanglemap = new float[nRadius][nDegree]; this.sinanglemap = new float[nRadius][nDegree]; for (int radIndex=0; radIndex<this.nRadius; radIndex++) { for (int angIndex=0; angIndex<nDegree; angIndex++) { double ang = angIndex * (2 * PI / nDegree); double rad = minRad + (radIndex * this.radIncr); this.cosanglemap [radIndex][angIndex] = (float) (rad*cos(ang)); this.sinanglemap [radIndex][angIndex] = (float) (rad*sin(ang)); } } }
private void writeData(String filename) { try { BufferedWriter writer = new BufferedWriter(new FileWriter(filename)); TIntFloatHashMap m; StringBuffer buf; for (int id : items) { buf = new StringBuffer(); buf.append(id + "\t"); m = map_item_intFeatures.get(id); int[] fIDs = m.keys(); Arrays.sort(fIDs); for (int i = 0; i < fIDs.length; i++) { buf.append(fIDs[i] + ":" + m.get(fIDs[i]) + " "); } writer.append(buf); writer.newLine(); } writer.flush(); writer.close(); } catch (IOException ex) { System.out.println(ex.getMessage()); } }
public UserModelRecommenderWorker(int u, BufferedWriter bw, TIntObjectHashMap<TIntFloatHashMap> map_item_intFeatures, Evaluator trainEval, Evaluator validEval, boolean silent, int topN, int num_features, List<Double> listC, List<Double> listEps, List<Integer> listSolverType, Map<Integer, Float> userTrainRatings, Map<Integer, Float> userValRatings, boolean implicit, int nValidNegEx, boolean addNegValidationEx, int timesRealFb, int minTrainEx, HashSet<Integer> items, float relUnknownItems, int topK, String metric) { this.topK = topK; this.metric = metric; this.u = u; this.bw = bw; this.map_item_intFeatures = map_item_intFeatures; this.trainEval = trainEval; this.validEval = validEval; this.silent = silent; this.topN = topN; this.num_features = num_features; this.listC = listC; this.listEps = listEps; this.listSolverType = listSolverType; this.userTrainRatings = userTrainRatings; this.userValRatings = userValRatings; this.implicit = implicit; this.relUnknownItems = relUnknownItems; this.nValidNegEx = nValidNegEx; this.timesRealFb = timesRealFb; this.minTrainEx = minTrainEx; this.items = items; this.addNegValidationEx = addNegValidationEx; originalTrainItems = new HashSet<Integer>(); }
public UserProfileSimilarityRecommenderWorker(int u, HashSet<Integer> items, BufferedWriter bw, TIntObjectHashMap<TIntFloatHashMap> map_item_intFeatures, int topN, Map<Integer, Float> userTrainRatings, boolean implicit, Float evalRatingThresh) { this.u = u; this.items = items; this.bw = bw; this.map_item_intFeatures = map_item_intFeatures; this.topN = topN; this.userTrainRatings = userTrainRatings; this.implicit = implicit; this.evalRatingThresh = evalRatingThresh; }
private float cmpCosineSim(TIntFloatHashMap v1, TIntFloatHashMap v2) { TIntHashSet inters = new TIntHashSet(); inters.addAll(v1.keySet()); inters.retainAll(v2.keySet()); if (inters.size() == 0) return 0; else { int i = 0; TIntIterator it = inters.iterator(); float num = 0; float norm_v1 = 0; float norm_v2 = 0; while (it.hasNext()) { i = it.next(); num += v1.get(i) * v2.get(i); } for (int k1 : v1.keys()) norm_v1 += (v1.get(k1) * v1.get(k1)); for (int k2 : v2.keys()) norm_v2 += (v2.get(k2) * v2.get(k2)); return num / (float) (Math.sqrt(norm_v1) * Math.sqrt(norm_v2)); } }
private void loadItemFeatureData(String file_name) { BufferedReader br; try { br = new BufferedReader(new FileReader(file_name)); String line = null; int count = 0; while ((line = br.readLine()) != null) { try { String[] vals = line.split("\t"); int id = Integer.parseInt(vals[0]); if (items.contains(id)) { map_item_intFeatures.put(id, new TIntFloatHashMap()); String[] values = vals[1].trim().split(" "); for (int i = 0; i < values.length; i++) { String[] pair = values[i].split(":"); int fId = Integer.parseInt(pair[0]); float fVal = Float.parseFloat(pair[1]); map_item_intFeatures.get(id).put(fId, fVal); } count++; } } catch (Exception ex) { // System.out.println(ex.getMessage()); // System.out.println(line); } } logger.info("item metadata loaded for evaluation - " + count + " items"); br.close(); } catch (IOException e) { e.printStackTrace(); } }
public static TIntObjectHashMap<TIntFloatHashMap> loadInputUsersRatings(String file) { TIntObjectHashMap<TIntFloatHashMap> user_rating =new TIntObjectHashMap<TIntFloatHashMap>(); try { BufferedReader br = new BufferedReader(new FileReader(file)); String line = null; float rate; int user_id, item_id; while ((line = br.readLine()) != null) { String[] vals = line.split("\t"); if(vals.length==2) rate=1; else rate = Float.parseFloat(vals[2]); user_id = Integer.parseInt(vals[0]); item_id = Integer.parseInt(vals[1]); user_rating.putIfAbsent(user_id, new TIntFloatHashMap()); user_rating.get(user_id).put(item_id, rate); } br.close(); } catch (Exception e) { e.printStackTrace(); } return user_rating; }
public static void loadInputUsersRatings(String file, TIntObjectHashMap<TIntFloatHashMap> user_rating, TFloatHashSet labels){ try{ BufferedReader br = new BufferedReader(new FileReader(file)); String line = null; float rate; int user_id, item_id; while((line=br.readLine()) != null){ String[] vals = line.split("\t"); rate = Float.parseFloat(vals[2]); user_id = Integer.parseInt(vals[0]); item_id = Integer.parseInt(vals[1]); user_rating.putIfAbsent(user_id, new TIntFloatHashMap()); user_rating.get(user_id).put(item_id, rate); labels.add(rate); } br.close(); } catch(Exception e){ e.printStackTrace(); } }
/** * Constuctor */ public UserPathExtractorWorker(int user_id, TIntFloatHashMap trainRatings, TIntFloatHashMap validationRatings, ArrayList<String> items_id, BufferedWriter train_file, BufferedWriter validation_file, BufferedWriter test_file, boolean normalize, THashMap<String, String> items_path_index, String path_file, TObjectIntHashMap<String> path_index, THashMap<String, String> paths, int user_items_sampling, float ratesThreshold, TIntObjectHashMap<TIntHashSet> items_link) { this.user_id = user_id; this.items_id = items_id; this.trainRatings = trainRatings; this.validationRatings = validationRatings; this.train_file = train_file; this.validation_file = validation_file; this.test_file = test_file; this.normalize = normalize; this.items_path_index = items_path_index; this.path_index = path_index; this.paths = paths; this.path_file = path_file; this.user_items_sampling = user_items_sampling; this.ratesThreshold = ratesThreshold; this.items_link = items_link; }
private TIntFloatMap makeOutlinkVector(TIntSet links) { TIntFloatMap vector = new TIntFloatHashMap(); for (int wpId : links.toArray()) { vector.put(wpId, (float) Math.log(1.0 * linkCache.getTotalPages() / linkCache .getInlinks(wpId).size())); } if(wlmExtended) { TIntFloatMap vector2 = new TIntFloatHashMap(); //考虑二级链接 for (int id1 : links.toArray()) { for (int id2 : linkCache.getOutlinks(id1).toArray()) { double tfidf = Math.log(1.0 * linkCache.getTotalPages() / linkCache.getInlinks(id2).size()); float w = (float) tfidf * vector.get(id1); float old = 0; if (vector2.containsKey(id2)) { old = vector2.get(id2); } vector2.put(id2, old + w); } } vector.putAll(vector2); } return vector; }
/** * 一次性统计概率,节约时间 */ private void statisticProb() { System.out.println("统计概率"); TIntFloatIterator it = wordProb.iterator(); while(it.hasNext()){ it.advance(); float v = it.value()/totalword; it.setValue(v); int key = it.key(); if(key<0) continue; Cluster cluster = new Cluster(key,v,alpahbet.lookupString(key)); clusters.put(key, cluster); } TIntObjectIterator<TIntFloatHashMap> it1 = pcc.iterator(); while(it1.hasNext()){ it1.advance(); TIntFloatHashMap map = it1.value(); TIntFloatIterator it2 = map.iterator(); while(it2.hasNext()){ it2.advance(); it2.setValue(it2.value()/totalword); } } }
private float getProb(int c1, int c2) { float p; TIntFloatHashMap map = pcc.get(c1); if(map == null){ p = 0f; }else{ p = pcc.get(c1).get(c2); } return p; }