private static void loadSparseFloatPartition(SparseFloatModel model, FSDataInputStream input, ModelPartitionMeta partMeta) throws IOException { int rowNum = input.readInt(); int rowId = 0; int nnz = 0; int totalNNZ = 0; Int2FloatOpenHashMap row = null; for (int i = 0; i < rowNum; i++) { rowId = input.readInt(); nnz = input.readInt(); totalNNZ = (int) (nnz * (model.col) / (partMeta.getEndCol() - partMeta.getStartCol())); row = model.getRow(rowId, partMeta.getPartId(), totalNNZ); for (int j = 0; j < nnz; j++) { row.put(input.readInt(), input.readFloat()); } } }
/** * Load dense double model to int->float maps * * @param modelDir model save directory path * @return model data */ public static Int2FloatOpenHashMap[] loadToFloatMaps(String modelDir, Configuration conf) throws IOException { // Load model meta ModelFilesMeta meta = getMeta(modelDir, conf); // Check row type if (meta.getRowType() != SPARSE_FLOAT) { throw new IOException("model row type is not sparse float, you should check it"); } // Load model SparseFloatModel model = new SparseFloatModel(meta.getRow(), meta.getCol()); loadModel(modelDir, model, meta, conf); return model.getModel(); }
private static void convertSparseFloatModel(Configuration conf, FSDataOutputStream output, String modelInputDir, ModelLineConvert lineConvert) throws IOException { Int2FloatOpenHashMap[] data = ModelLoader.loadToFloatMaps(modelInputDir, conf); for(int i = 0; i < data.length; i++) { Int2FloatOpenHashMap row = data[i]; data[i] = null; if(row == null) { continue; } lineConvert.convertRowIndex(output, i); int [] indexes = row.keySet().toIntArray(); float [] values = row.values().toFloatArray(); row = null; Sort.quickSort(indexes, values, 0, indexes.length - 1); for(int j = 0; j < indexes.length; j++) { lineConvert.convertFloat(output, indexes[j], values[j]); } } }
/** * Test of sortByValue method, of class Utils. */ @org.junit.Test public void testSortByValue() { System.out.println("sortByValue"); Int2FloatOpenHashMap map = new Int2FloatOpenHashMap(); map.put(3, 0.5f); map.put(2, 0.6f); map.put(4, 0.4f); map.put(1, 0.8f); map.put(5, 0f); Integer[] arrayResult = new Integer[map.size()]; arrayResult = Utils.sortByValue(map, true).keySet().toArray(arrayResult); Integer[] correctResult = new Integer[]{1,2,3,4,5}; assertArrayEquals(arrayResult, correctResult); arrayResult = Utils.sortByValue(map, false).keySet().toArray(arrayResult); correctResult = new Integer[]{5,4,3,2,1}; assertArrayEquals(arrayResult, correctResult); }
private void replayTrain(@Nonnull final ByteBuffer buf) { final int itemI = buf.getInt(); final int knnSize = buf.getInt(); final Int2ObjectMap<Int2FloatMap> knnItems = new Int2ObjectOpenHashMap<>(1024); final IntSet pairItems = new IntOpenHashSet(); for (int i = 0; i < knnSize; i++) { int user = buf.getInt(); int ruSize = buf.getInt(); Int2FloatMap ru = new Int2FloatOpenHashMap(ruSize); ru.defaultReturnValue(0.f); for (int j = 0; j < ruSize; j++) { int itemK = buf.getInt(); pairItems.add(itemK); float ruk = buf.getFloat(); ru.put(itemK, ruk); } knnItems.put(user, ru); } for (int itemJ : pairItems) { train(itemI, knnItems, itemJ); } }
@Nonnull private static Int2FloatMap int2floatMap(@Nonnull final Map<?, ?> map, @Nonnull final PrimitiveObjectInspector keyOI, @Nonnull final PrimitiveObjectInspector valueOI) { final Int2FloatMap result = new Int2FloatOpenHashMap(map.size()); result.defaultReturnValue(0.f); for (Map.Entry<?, ?> entry : map.entrySet()) { float v = PrimitiveObjectInspectorUtils.getFloat(entry.getValue(), valueOI); if (v == 0.f) { continue; } int k = PrimitiveObjectInspectorUtils.getInt(entry.getKey(), keyOI); result.put(k, v); } return result; }
/** * Constructor * * @param service Service worker * @param combiner Message combiner */ public IntFloatMessageStore( CentralizedServiceWorker<IntWritable, ?, ?> service, Combiner<IntWritable, FloatWritable> combiner) { this.service = service; this.combiner = combiner; map = new Int2ObjectOpenHashMap<Int2FloatOpenHashMap>(); for (int partitionId : service.getPartitionStore().getPartitionIds()) { Partition<IntWritable, ?, ?> partition = service.getPartitionStore().getPartition(partitionId); Int2FloatOpenHashMap partitionMap = new Int2FloatOpenHashMap((int) partition.getVertexCount()); map.put(partitionId, partitionMap); } }
/** * Get a model row * * @param rowId row index * @param partId partition index * @return model row */ public Int2FloatOpenHashMap getRow(int rowId, int partId) { synchronized (this) { if (tempModel.get(rowId) == null) { tempModel.put(rowId, new HashMap<>()); tempModel.get(rowId).put(partId, new Int2FloatOpenHashMap()); } else { if (tempModel.get(rowId).get(partId) == null) { tempModel.get(rowId).put(partId, new Int2FloatOpenHashMap()); } } return tempModel.get(rowId).get(partId); } }
/** * Get a model row * * @param rowId row index * @param partId partition index * @param nnz estimated non-zero elements number * @return model row */ public Int2FloatOpenHashMap getRow(int rowId, int partId, int nnz) { synchronized (this) { if (tempModel.get(rowId) == null) { tempModel.put(rowId, new HashMap<>()); tempModel.get(rowId).put(partId, new Int2FloatOpenHashMap(nnz)); } else { if (tempModel.get(rowId).get(partId) == null) { tempModel.get(rowId).put(partId, new Int2FloatOpenHashMap(nnz)); } } return tempModel.get(rowId).get(partId); } }
public static Int2FloatOpenHashMap loadSparseFloatRowFromPartition(FSDataInputStream input, ModelPartitionMeta partMeta, int rowId) throws IOException { RowOffset rowOffset = partMeta.getRowMetas().get(rowId); input.seek(rowOffset.getOffset()); Preconditions.checkState (input.readInt() == rowId); int num = input.readInt(); Int2FloatOpenHashMap row = new Int2FloatOpenHashMap(); for (int i = 0; i < num; i++) { row.put(input.readInt(), input.readFloat()); } return row; }
/** * init the vector by setting the dim and capacity * * @param dim vector dimension * @param capacity map capacity */ public SparseFloatVector(int dim, int capacity) { super(); if(capacity > 0){ this.hashMap = new Int2FloatOpenHashMap(capacity); } else { this.hashMap = new Int2FloatOpenHashMap(INIT_SIZE); } this.dim = dim; }
@Override public void deserialize(ByteBuf buf) { int dim = buf.readInt(); int length = buf.readInt(); Int2FloatOpenHashMap data = new Int2FloatOpenHashMap(length); IntStream.range(0,length).forEach(i-> data.put(buf.readInt(), buf.readFloat())); this.dim = dim; this.hashMap = data; }
/** * Merge this sparse float vector split to a map * @param indexToValueMap a index->value map */ public void mergeTo(Int2FloatOpenHashMap indexToValueMap) { try { lock.readLock().lock(); indexToValueMap.putAll(hashMap); } finally { lock.readLock().unlock(); } }
/** * @deprecated Not needed in the new implementation. Kept, in case it will be useful in the future. * Returns the neighborSim of each entity pair. * @param valueSims an RDD with the top-K value sims for each entity, in the form key: eid, value: (candidateMatch cId, value_sim(eId,cId)) * @param inNeighbors_BV * @return the neighborSim of each entity pair, where entity pair is the key and neighborSim is the value */ private JavaPairRDD<Tuple2<Integer, Integer>, Float> getNeighborSims(JavaPairRDD<Integer,Int2FloatOpenHashMap> valueSims, Broadcast<Map<Integer,IntArrayList>> inNeighbors_BV) { return valueSims.flatMapToPair(x->{ int eId = x._1(); IntArrayList eInNeighbors = inNeighbors_BV.value().get(eId); List<Tuple2<Tuple2<Integer,Integer>, Float>> partialNeighborSims = new ArrayList<>(); //key: (negativeEid, positiveEid), value: valueSim(outNeighbor(nEid),outNeighbor(pEid)) if (eInNeighbors == null) { return partialNeighborSims.iterator(); //empty } for (Map.Entry<Integer, Float> eIdValueCandidates : x._2().entrySet()) { IntArrayList inNeighborsOfCandidate = inNeighbors_BV.value().get(eIdValueCandidates.getKey()); if (inNeighborsOfCandidate == null) { continue; //go to next candidate match. this one does not have in-neighbors } Float tmpNeighborSim = eIdValueCandidates.getValue(); for (Integer inNeighborOfCandidate : inNeighborsOfCandidate) { //for each in-neighbor of the candidate match of the current entity for (Integer eInNeighbor : eInNeighbors) { //for each in-neighbor of the current entity if (eId < 0) partialNeighborSims.add(new Tuple2<>(new Tuple2<>(eInNeighbor, inNeighborOfCandidate), tmpNeighborSim)); else partialNeighborSims.add(new Tuple2<>(new Tuple2<>(inNeighborOfCandidate, eInNeighbor), tmpNeighborSim)); } } } return partialNeighborSims.iterator(); }) .reduceByKey((w1, w2) -> Math.max(w1, w2)); //for each entity pair, neighborSim = max value sim of its pairs of out-neighbors }
public FMIntFeatureMapModel(@Nonnull FMHyperParameters params) { super(params); this._w0 = 0.f; this._w = new Int2FloatOpenHashMap(DEFAULT_MAPSIZE); _w.defaultReturnValue(0.f); this._V = new Int2ObjectOpenHashMap<float[]>(DEFAULT_MAPSIZE); this._minIndex = 0; this._maxIndex = 0; }
@Override protected PredictionModel createModel() { this._w0 = 0.f; this._w1 = new Int2FloatOpenHashMap(16384); _w1.defaultReturnValue(0.f); this._w2 = new Int2FloatOpenHashMap(16384); _w2.defaultReturnValue(0.f); this._w3 = new Int2FloatOpenHashMap(16384); _w3.defaultReturnValue(0.f); return null; }
/** * Create a SparseFloatVector consisting of double values according to the * specified mapping of indices and values. * * @param values the values to be set as values of the real vector * @param dimensionality the dimensionality of this feature vector * @throws IllegalArgumentException if the given dimensionality is too small * to cover the given values (i.e., the maximum index of any value not * zero is bigger than the given dimensionality) */ public SparseFloatVector(Int2FloatOpenHashMap values, int dimensionality) throws IllegalArgumentException { if(values.size() > dimensionality) { throw new IllegalArgumentException("values.size() > dimensionality!"); } this.indexes = new int[values.size()]; this.values = new float[values.size()]; // Import and sort the indexes { ObjectIterator<Int2FloatMap.Entry> iter = values.int2FloatEntrySet().fastIterator(); for(int i = 0; iter.hasNext(); i++) { this.indexes[i] = iter.next().getIntKey(); } Arrays.sort(this.indexes); } // Import the values accordingly { for(int i = 0; i < values.size(); i++) { this.values[i] = values.get(this.indexes[i]); } } this.dimensionality = dimensionality; final int maxdim = getMaxDim(); if(maxdim > dimensionality) { throw new IllegalArgumentException("Given dimensionality " + dimensionality + " is too small w.r.t. the given values (occurring maximum: " + maxdim + ")."); } }
@Override public void addPartitionMessages(int partitionId, ByteArrayVertexIdMessages<IntWritable, FloatWritable> messages) throws IOException { IntWritable reusableVertexId = new IntWritable(); FloatWritable reusableMessage = new FloatWritable(); FloatWritable reusableCurrentMessage = new FloatWritable(); Int2FloatOpenHashMap partitionMap = map.get(partitionId); synchronized (partitionMap) { ByteArrayVertexIdMessages<IntWritable, FloatWritable>.VertexIdMessageIterator iterator = messages.getVertexIdMessageIterator(); while (iterator.hasNext()) { iterator.next(); int vertexId = iterator.getCurrentVertexId().get(); float message = iterator.getCurrentMessage().get(); if (partitionMap.containsKey(vertexId)) { reusableVertexId.set(vertexId); reusableMessage.set(message); reusableCurrentMessage.set(partitionMap.get(vertexId)); combiner.combine(reusableVertexId, reusableCurrentMessage, reusableMessage); message = reusableCurrentMessage.get(); } partitionMap.put(vertexId, message); } } }
@Override public Iterable<FloatWritable> getVertexMessages( IntWritable vertexId) throws IOException { Int2FloatOpenHashMap partitionMap = getPartitionMap(vertexId); if (!partitionMap.containsKey(vertexId.get())) { return EmptyIterable.get(); } else { return Collections.singleton( new FloatWritable(partitionMap.get(vertexId.get()))); } }
@Override public Iterable<IntWritable> getPartitionDestinationVertices( int partitionId) { Int2FloatOpenHashMap partitionMap = map.get(partitionId); List<IntWritable> vertices = Lists.newArrayListWithCapacity(partitionMap.size()); IntIterator iterator = partitionMap.keySet().iterator(); while (iterator.hasNext()) { vertices.add(new IntWritable(iterator.nextInt())); } return vertices; }
@Override public void writePartition(DataOutput out, int partitionId) throws IOException { Int2FloatOpenHashMap partitionMap = map.get(partitionId); out.writeInt(partitionMap.size()); ObjectIterator<Int2FloatMap.Entry> iterator = partitionMap.int2FloatEntrySet().fastIterator(); while (iterator.hasNext()) { Int2FloatMap.Entry entry = iterator.next(); out.writeInt(entry.getIntKey()); out.writeFloat(entry.getFloatValue()); } }
@Override public void readFieldsForPartition(DataInput in, int partitionId) throws IOException { int size = in.readInt(); Int2FloatOpenHashMap partitionMap = new Int2FloatOpenHashMap(size); while (size-- > 0) { int vertexId = in.readInt(); float message = in.readFloat(); partitionMap.put(vertexId, message); } synchronized (map) { map.put(partitionId, partitionMap); } }
public static void main(String[] args) throws IOException { final Configuration conf = new Configuration(); // load hadoop configuration String hadoopHomePath = System.getenv("HADOOP_HOME"); if (hadoopHomePath == null) { LOG.warn("HADOOP_HOME is empty."); } else { conf.addResource(new Path(hadoopHomePath + "/etc/hadoop/yarn-site.xml")); conf.addResource(new Path(hadoopHomePath + "/etc/hadoop/hdfs-site.xml")); } String baseDir = "out"; String denseDoubleModelPath = baseDir + "/dense_double"; String sparseDoubleModelPath = baseDir + "/sparse_double"; String denseFloatModelPath = baseDir + "/dense_float"; String sparseFloatModelPath = baseDir + "/sparse_float"; String denseIntModelPath = baseDir + "/dense_int"; String sparseIntModelPath = baseDir + "/sparse_int"; String sparseDoubleLongKeyModelPath = baseDir + "/sparse_double_longkey"; double[][] denseDoubleModel = loadToDoubleArrays(denseDoubleModelPath, conf); int size = denseDoubleModel.length; for (int i = 0; i < size; i++) { LOG.info("model dense_double row " + i + " sum is " + sum(denseDoubleModel[i])); } denseDoubleModel = null; Int2DoubleOpenHashMap[] sparseDoubleModel = loadToDoubleMaps(sparseDoubleModelPath, conf); size = sparseDoubleModel.length; for (int i = 0; i < size; i++) { LOG.info("model sparse_double row " + i + " sum is " + sum(sparseDoubleModel[i])); } sparseDoubleModel = null; float[][] denseFloatModel = loadToFloatArrays(denseFloatModelPath, conf); size = denseFloatModel.length; for (int i = 0; i < size; i++) { LOG.info("model dense_float row " + i + " sum is " + sum(denseFloatModel[i])); } denseFloatModel = null; Int2FloatOpenHashMap[] sparseFloatModel = loadToFloatMaps(sparseFloatModelPath, conf); size = sparseFloatModel.length; for (int i = 0; i < size; i++) { LOG.info("model sparse_float row " + i + " sum is " + sum(sparseFloatModel[i])); } sparseFloatModel = null; int[][] denseIntModel = loadToIntArrays(denseIntModelPath, conf); size = denseIntModel.length; for (int i = 0; i < size; i++) { LOG.info("model dense_int row " + i + " sum is " + sum(denseIntModel[i])); } denseIntModel = null; Int2IntOpenHashMap[] sparseIntModel = loadToIntMaps(sparseIntModelPath, conf); size = sparseIntModel.length; for (int i = 0; i < size; i++) { LOG.info("model sparse_int row " + i + " sum is " + sum(sparseIntModel[i])); } sparseIntModel = null; Long2DoubleOpenHashMap[] sparseDoubleLongKeyModel = loadToDoubleLongKeyMaps( sparseDoubleLongKeyModelPath, conf); size = sparseDoubleLongKeyModel.length; for (int i = 0; i < size; i++) { LOG.info( "model sparse_double_longkey row " + i + " sum is " + sum(sparseDoubleLongKeyModel[i])); } sparseDoubleLongKeyModel = null; }
/** * init the empty vector */ public SparseFloatVector() { super(); this.hashMap = new Int2FloatOpenHashMap(INIT_SIZE); }
public Int2FloatOpenHashMap getIndexToValueMap() { return hashMap; }
public Int2FloatOpenHashMap getData() { return hashMap; }
/** * * @param blocksFromEI * @param totalWeightsBV * @param K * @param numNegativeEntities * @param numPositiveEntities * @return key: an entityId, value: a list of pairs of candidate matches along with their value_sim with the key */ public JavaPairRDD<Integer,Int2FloatLinkedOpenHashMap> getTopKValueSims(JavaPairRDD<Integer, IntArrayList> blocksFromEI, int K, long numNegativeEntities, long numPositiveEntities) { //key: an entityId, value: a list of candidate matches, with first number being the number of entities from the same collection in this block JavaPairRDD<Integer, IntArrayList> mapOutput = CNPMapPhase.getMapOutputWJS(blocksFromEI); //reduce phase //metaBlockingResults: key: a negative entityId, value: a list of candidate matches (positive entity ids) along with their value_sim with the key return mapOutput .groupByKey() //for each entity create an iterable of arrays of candidate matches (one array from each common block) .mapToPair(x -> { int entityId = x._1(); //compute the numerators Int2FloatOpenHashMap counters = new Int2FloatOpenHashMap(); //number of common blocks with current entity per candidate match for(IntArrayList candidates : x._2()) { int numNegativeEntitiesInBlock = candidates.getInt(0); //the first element is the number of entities from the same collection int numPositiveEntitiesInBlock = candidates.size()-1; //all the other candidates are positive entity ids if (entityId >= 0) { numPositiveEntitiesInBlock = candidates.getInt(0); numNegativeEntitiesInBlock = candidates.size()-1; } float weight1 = (float) Math.log10((double)numNegativeEntities/numNegativeEntitiesInBlock); float weight2 = (float) Math.log10((double)numPositiveEntities/numPositiveEntitiesInBlock); candidates = new IntArrayList(candidates.subList(1, candidates.size())); //remove the first element which is the number of entities in this block from the same collection as the entityId for (int candidateId : candidates) { counters.addTo(candidateId, weight1+weight2); } } //keep the top-K weights Int2FloatLinkedOpenHashMap weights = new Int2FloatLinkedOpenHashMap(Utils.sortByValue(counters, true)); Int2FloatLinkedOpenHashMap weightsToEmit = new Int2FloatLinkedOpenHashMap(); int i = 0; for (Map.Entry<Integer, Float> neighbor : weights.entrySet()) { if (i == weights.size() || i == K) { break; } weightsToEmit.put(neighbor.getKey().intValue(), weights.get(neighbor.getKey().intValue())); i++; } return new Tuple2<>(entityId, weightsToEmit); }) .filter(x-> !x._2().isEmpty()); }
public ArrayBackedGraphCW(int arraySize) { this.nodeLabels = new Integer[arraySize]; this.labelScores = new Int2FloatOpenHashMap(); }
public ArrayBackedGraphCW(int arraySize) { nodeLabels = new Integer[arraySize]; labelScores = new Int2FloatOpenHashMap(); }
public static Int2FloatMap int2FloatMap() { return new Int2FloatOpenHashMap(); // return new Int2FloatAVLTreeMap(); }
/** * Create a new vector with default size. */ public FloatVector() { initialize(Int2FloatOpenHashMap.DEFAULT_INITIAL_SIZE); }
/** * init the vector by setting the dim map * * @param dim vector dimension * @param map a (int, float) map */ public SparseFloatVector(int dim, Int2FloatOpenHashMap map) { super(); this.dim = dim; this.hashMap = map; }
/** * init the vector by another vector * * @param other other vector which has same dimension */ public SparseFloatVector(SparseFloatVector other) { super(other); this.hashMap = new Int2FloatOpenHashMap(other.hashMap); }
/** * Create a ServerSparseFloatRow * @param rowId row index * @param startCol partition start column index * @param endCol partition end column index */ public ServerSparseFloatRow(int rowId, int startCol, int endCol) { super(rowId, startCol, endCol); hashMap = new Int2FloatOpenHashMap(); }
/** * Get map which holds messages for partition which vertex belongs to. * * @param vertexId Id of the vertex * @return Map which holds messages for partition which vertex belongs to. */ private Int2FloatOpenHashMap getPartitionMap(IntWritable vertexId) { return map.get(service.getPartitionId(vertexId)); }
/** * Initialize the values of the vector. The default value is 0.0 * * @param size the size of the vector */ private void initialize(int size) { entries = new Int2FloatOpenHashMap(size); entries.defaultReturnValue(0.0f); }