You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@madlib.apache.org by "Frank McQuillan (JIRA)" <ji...@apache.org> on 2018/10/01 19:51:00 UTC

[jira] [Created] (MADLIB-1279) Missing node for graph in-out degrees

Frank McQuillan created MADLIB-1279:
---------------------------------------

             Summary: Missing node for graph in-out degrees
                 Key: MADLIB-1279
                 URL: https://issues.apache.org/jira/browse/MADLIB-1279
             Project: Apache MADlib
          Issue Type: Bug
          Components: Module: Graph
            Reporter: Frank McQuillan
             Fix For: v1.15.1


MADlib seems to consider destination-side vertex only. 
If any vertex just exists in a source side and not in a destination side, 
then such vertex has no result for 'In-Out Degree' ,
with or without 'Grouping Columns'. 

I've changed the example of 'edge' table in the user docs to show the bug. 
(from (6, 7, 1.0)  ==> to (7, 6, 1.0))
The vertex '7' will have outdegree '1' and indegree '0'. 
madlib.graph_vertex_degrees() function with grouping column produces 'no id' for vertex '7'. 

Create table:
{code}
DROP TABLE IF EXISTS vertex, edge;

CREATE TABLE vertex(
        id INTEGER,
        name TEXT
        );

CREATE TABLE edge(
        src_id INTEGER,
        dest_id INTEGER,
        edge_weight FLOAT8
        );

INSERT INTO vertex VALUES
(0, 'A'),
(1, 'B'),
(2, 'C'),
(3, 'D'),
(4, 'E'),
(5, 'F'),
(6, 'G'),
(7, 'H');

INSERT INTO edge VALUES
(0, 1, 1.0),
(0, 2, 1.0),
(0, 4, 10.0),
(1, 2, 2.0),
(1, 3, 10.0),
(2, 3, 1.0),
(2, 5, 1.0),
(2, 6, 3.0),
(3, 0, 1.0),
(4, 0, -2.0),
(5, 6, 1.0),
(7, 6, 1.0);

SELECT * FROM edge ORDER BY src_id, dest_id;
{code}
{code}
 src_id | dest_id | edge_weight 
--------+---------+-------------
      0 |       1 |           1
      0 |       2 |           1
      0 |       4 |          10
      1 |       2 |           2
      1 |       3 |          10
      2 |       3 |           1
      2 |       5 |           1
      2 |       6 |           3
      3 |       0 |           1
      4 |       0 |          -2
      5 |       6 |           1
      7 |       6 |           1
(12 rows)
{code}

Create table with grouping:
{code}
DROP TABLE IF EXISTS edge_gr;

CREATE TABLE edge_gr AS
(
  SELECT *, 0 AS grp FROM edge
  UNION
  SELECT *, 1 AS grp FROM edge WHERE src_id < 6 AND dest_id < 6
);

INSERT INTO edge_gr VALUES
(4,5,-20,1);

SELECT * FROM edge_gr ORDER BY grp, src_id, dest_id;
{code}
{code}
 src_id | dest_id | edge_weight | grp 
--------+---------+-------------+-----
      0 |       1 |           1 |   0
      0 |       2 |           1 |   0
      0 |       4 |          10 |   0
      1 |       2 |           2 |   0
      1 |       3 |          10 |   0
      2 |       3 |           1 |   0
      2 |       5 |           1 |   0
      2 |       6 |           3 |   0
      3 |       0 |           1 |   0
      4 |       0 |          -2 |   0
      5 |       6 |           1 |   0
      7 |       6 |           1 |   0
      0 |       1 |           1 |   1
      0 |       2 |           1 |   1
      0 |       4 |          10 |   1
      1 |       2 |           2 |   1
      1 |       3 |          10 |   1
      2 |       3 |           1 |   1
      2 |       5 |           1 |   1
      3 |       0 |           1 |   1
      4 |       0 |          -2 |   1
      4 |       5 |         -20 |   1
(22 rows)
{code}

In-out degrees:
{code}
DROP TABLE IF EXISTS degrees;

SELECT madlib.graph_vertex_degrees(
    'vertex',      -- Vertex table
    'id',          -- Vertix id column (NULL means use default naming)
    'edge',        -- Edge table
    'src=src_id, dest=dest_id, weight=edge_weight',
    'degrees');    -- Output table of shortest paths

SELECT * FROM degrees ORDER BY id;
```
produces
```
 id | indegree | outdegree 
----+----------+-----------
  0 |        2 |         3
  1 |        1 |         2
  2 |        2 |         3
  3 |        2 |         1
  4 |        1 |         1
  5 |        1 |         1
  6 |        3 |         0
    |        0 |         1
(8 rows)
{code}
where id=7 is missing.

Likewise with grouping:
{code}
DROP TABLE IF EXISTS out_gr;

SELECT madlib.graph_vertex_degrees(
    'vertex',      -- Vertex table
    NULL,          -- Vertex id column (NULL means use default naming)
    'edge_gr',     -- Edge table
    'src=src_id, dest=dest_id, weight=edge_weight',
    'out_gr',      -- Output table of shortest paths
    'grp'          -- Grouping columns
);

SELECT * FROM out_gr ORDER BY grp, id;
{code}
produces
{code}
 grp | id | indegree | outdegree 
-----+----+----------+-----------
   0 |  0 |        2 |         3
   0 |  1 |        1 |         2
   0 |  2 |        2 |         3
   0 |  3 |        2 |         1
   0 |  4 |        1 |         1
   0 |  5 |        1 |         1
   0 |  6 |        3 |         0
   0 |    |        0 |         1
   1 |  0 |        2 |         3
   1 |  1 |        1 |         2
   1 |  2 |        2 |         2
   1 |  3 |        2 |         1
   1 |  4 |        1 |         2
   1 |  5 |        2 |         0
(14 rows)
{code}
where id=7 is missing.






--
This message was sent by Atlassian JIRA
(v7.6.3#76005)