{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from graphframes import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "trip = sqlContext.read.format(\"com.databricks.spark.csv\").options(header='true', inferschema='true').load(\"file:///home/cloudera/trip.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "station = sqlContext.read.format(\"com.databricks.spark.csv\").options(header='true', inferschema='true').load(\"file:///home/cloudera/station.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DataFrame[Trip ID: int, Duration: int, Start Date: string, Start Station: string, Start Terminal: int, End Date: string, End Station: string, End Terminal: int, Bike #: int, Subscriber Type: string, Zip Code: string]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trip.registerTempTable(\"trip\")\n",
    "trip.cache()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DataFrame[station_id: int, name: string, lat: double, long: double, dockcount: int, landmark: string, installation: string]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "station.registerTempTable(\"station\")\n",
    "station.cache()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----------+--------------------+---------+-----------+---------+--------+------------+\n",
      "|station_id|                name|      lat|       long|dockcount|landmark|installation|\n",
      "+----------+--------------------+---------+-----------+---------+--------+------------+\n",
      "|         2|San Jose Diridon ...|37.329732|-121.901782|       27|San Jose|    8/6/2013|\n",
      "|         3|San Jose Civic Ce...|37.330698|-121.888979|       15|San Jose|    8/5/2013|\n",
      "|         4|Santa Clara at Al...|37.333988|-121.894902|       11|San Jose|    8/6/2013|\n",
      "|         5|    Adobe on Almaden|37.331415|  -121.8932|       19|San Jose|    8/5/2013|\n",
      "+----------+--------------------+---------+-----------+---------+--------+------------+\n",
      "only showing top 4 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sqlContext.sql(\"SELECT * from station\").show(4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------+--------+---------------+--------------------+--------------+---------------+--------------------+------------+------+---------------+--------+\n",
      "|Trip ID|Duration|     Start Date|       Start Station|Start Terminal|       End Date|         End Station|End Terminal|Bike #|Subscriber Type|Zip Code|\n",
      "+-------+--------+---------------+--------------------+--------------+---------------+--------------------+------------+------+---------------+--------+\n",
      "| 913460|     765|8/31/2015 23:26|Harry Bridges Pla...|            50|8/31/2015 23:39|San Francisco Cal...|          70|   288|     Subscriber|    2139|\n",
      "| 913459|    1036|8/31/2015 23:11|San Antonio Shopp...|            31|8/31/2015 23:28|Mountain View Cit...|          27|    35|     Subscriber|   95032|\n",
      "| 913455|     307|8/31/2015 23:13|      Post at Kearny|            47|8/31/2015 23:18|   2nd at South Park|          64|   468|     Subscriber|   94107|\n",
      "| 913454|     409|8/31/2015 23:10|  San Jose City Hall|            10|8/31/2015 23:17| San Salvador at 1st|           8|    68|     Subscriber|   95113|\n",
      "+-------+--------+---------------+--------------------+--------------+---------------+--------------------+------------+------+---------------+--------+\n",
      "only showing top 4 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sqlContext.sql(\"SELECT * from trip\").show(4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "st_unique=sqlContext.sql(\"select distinct name,lat,long from station\")\n",
    "st_unique.registerTempTable(\"st_unique\")\n",
    "\n",
    "from pyspark.sql.functions import *\n",
    "from graphframes import *\n",
    "\n",
    "vertices = st_unique.withColumnRenamed(\"name\", \"id\").distinct()\n",
    "\n",
    "trip = trip.withColumnRenamed(\"Start Station\",\"src\")\n",
    "\n",
    "trip = trip.withColumnRenamed(\"End Station\",\"dst\")\n",
    "\n",
    "tripEdges = trip.select('Trip ID', 'Duration','src', 'dst','Start Terminal','End Terminal','Start Date','End Date','BIke #')\n",
    "\n",
    "tripEdges.cache()\n",
    "vertices.cache()\n",
    "\n",
    "vertices.show(4)\n",
    "\n",
    "tripEdges.show(4)\n",
    "\n",
    "\n",
    "g = GraphFrame(vertices,tripEdges)\n",
    "print g\n",
    "\n",
    "g.vertices.show()\n",
    "\n",
    "g.edges.show()\n",
    "\n",
    "inDg = g.inDegrees\n",
    "inDg.show(10)\n",
    "\n",
    "out_Dg =g.outDegrees\n",
    "out_Dg.show(10)\n",
    "\n",
    "g.degrees.show()\n",
    "\n",
    "g.inDegrees.count()\n",
    "\n",
    "g.outDegrees.count()\n",
    "\n",
    "g.inDegrees.sort(desc(\"inDegree\")).show(10)\n",
    "\n",
    "g.outDegrees.sort(desc(\"outDegree\")).show(10)\n",
    "\n",
    "motifs = g.find(\"(a)-[e]->(b)\")\n",
    "motifs.show()\n",
    "\n",
    "motifs.e.count()\n",
    "\n",
    "\n",
    "import pyspark.sql.functions as func\n",
    "topTrips = g.edges.groupBy(\"src\", \"dst\").agg(func.count(\"Trip ID\").alias(\"trips\"))\n",
    "\n",
    "\n",
    "topTrips.orderBy(topTrips.trips.desc()).limit(20).show()\n",
    "\n",
    "\n",
    "#PageRank algorithm\n",
    "results = g.pageRank(resetProbability=0.15, maxIter = 2)\n",
    "\n",
    "results.vertices.orderBy(results.vertices.pagerank.desc()).limit(20).show()\n",
    "\n",
    "motifs = g.find(\"(a)-[ab]->(b); (b)-[bc]->(c)\")\n",
    "motifs.show()\n",
    "\n",
    "###outdegrees/indegrees\n",
    "dg_Ratio = inDg.join(out_Dg,inDg.id == out_Dg.id).drop(out_Dg.id)\n",
    "dg_Ratio.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "st_unique=sqlContext.sql(\"select distinct name,lat,long from station\")\n",
    "st_unique.registerTempTable(\"st_unique\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from pyspark.sql.functions import *\n",
    "from graphframes import *\n",
    "\n",
    "vertices = st_unique.withColumnRenamed(\"name\", \"id\").distinct()\n",
    "\n",
    "trip = trip.withColumnRenamed(\"Start Station\",\"src\")\n",
    "\n",
    "trip = trip.withColumnRenamed(\"End Station\",\"dst\")\n",
    "\n",
    "tripEdges = trip.select('Trip ID', 'Duration','src', 'dst','Start Terminal','End Terminal','Start Date','End Date','BIke #')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+---------+-----------+\n",
      "|                  id|      lat|       long|\n",
      "+--------------------+---------+-----------+\n",
      "|       2nd at Folsom|37.785299|-122.396236|\n",
      "|   2nd at South Park|37.782259|-122.392738|\n",
      "|Rengstorff Avenue...|37.400241|-122.099076|\n",
      "|California Ave Ca...|37.429082|-122.142805|\n",
      "+--------------------+---------+-----------+\n",
      "only showing top 4 rows\n",
      "\n",
      "+-------+--------+--------------------+--------------------+--------------+------------+---------------+---------------+------+\n",
      "|Trip ID|Duration|                 src|                 dst|Start Terminal|End Terminal|     Start Date|       End Date|BIke #|\n",
      "+-------+--------+--------------------+--------------------+--------------+------------+---------------+---------------+------+\n",
      "| 913460|     765|Harry Bridges Pla...|San Francisco Cal...|            50|          70|8/31/2015 23:26|8/31/2015 23:39|   288|\n",
      "| 913459|    1036|San Antonio Shopp...|Mountain View Cit...|            31|          27|8/31/2015 23:11|8/31/2015 23:28|    35|\n",
      "| 913455|     307|      Post at Kearny|   2nd at South Park|            47|          64|8/31/2015 23:13|8/31/2015 23:18|   468|\n",
      "| 913454|     409|  San Jose City Hall| San Salvador at 1st|            10|           8|8/31/2015 23:10|8/31/2015 23:17|    68|\n",
      "+-------+--------+--------------------+--------------------+--------------+------------+---------------+---------------+------+\n",
      "only showing top 4 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "tripEdges.cache()\n",
    "vertices.cache()\n",
    "vertices.show(4)\n",
    "tripEdges.show(4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GraphFrame(v:[id: string, lat: double, long: double], e:[src: string, dst: string, Trip ID: int, Duration: int, Start Terminal: int, End Terminal: int, Start Date: string, End Date: string, BIke #: int])\n",
      "+--------------------+---------+-----------+\n",
      "|                  id|      lat|       long|\n",
      "+--------------------+---------+-----------+\n",
      "|       2nd at Folsom|37.785299|-122.396236|\n",
      "|   2nd at South Park|37.782259|-122.392738|\n",
      "|Rengstorff Avenue...|37.400241|-122.099076|\n",
      "|California Ave Ca...|37.429082|-122.142805|\n",
      "|Cowper at University|37.448598|-122.159504|\n",
      "|Harry Bridges Pla...|37.795392|-122.394203|\n",
      "|         Ryland Park|37.342725|-121.895617|\n",
      "|  San Jose City Hall|37.337391|-121.886995|\n",
      "|Embarcadero at Br...|37.787152|-122.388013|\n",
      "|       5th at Howard|37.781752|-122.405127|\n",
      "|SJSU - San Salvad...|37.333955|-121.877349|\n",
      "|Commercial at Mon...|37.794231|-122.402923|\n",
      "|Evelyn Park and Ride|37.390277|-122.066553|\n",
      "|     Clay at Battery|37.795001| -122.39997|\n",
      "|San Antonio Caltr...| 37.40694|-122.106758|\n",
      "|       Market at 4th|37.786305|-122.404966|\n",
      "|SJSU 4th at San C...|37.332808|-121.883891|\n",
      "|Santa Clara Count...|37.352601|-121.905733|\n",
      "|San Francisco Cit...| 37.77865|-122.418235|\n",
      "|Mountain View Cit...|37.389218|-122.081896|\n",
      "+--------------------+---------+-----------+\n",
      "only showing top 20 rows\n",
      "\n",
      "+-------+--------+--------------------+--------------------+--------------+------------+---------------+---------------+------+\n",
      "|Trip ID|Duration|                 src|                 dst|Start Terminal|End Terminal|     Start Date|       End Date|BIke #|\n",
      "+-------+--------+--------------------+--------------------+--------------+------------+---------------+---------------+------+\n",
      "| 913460|     765|Harry Bridges Pla...|San Francisco Cal...|            50|          70|8/31/2015 23:26|8/31/2015 23:39|   288|\n",
      "| 913459|    1036|San Antonio Shopp...|Mountain View Cit...|            31|          27|8/31/2015 23:11|8/31/2015 23:28|    35|\n",
      "| 913455|     307|      Post at Kearny|   2nd at South Park|            47|          64|8/31/2015 23:13|8/31/2015 23:18|   468|\n",
      "| 913454|     409|  San Jose City Hall| San Salvador at 1st|            10|           8|8/31/2015 23:10|8/31/2015 23:17|    68|\n",
      "| 913453|     789|Embarcadero at Fo...|Embarcadero at Sa...|            51|          60|8/31/2015 23:09|8/31/2015 23:22|   487|\n",
      "| 913452|     293|Yerba Buena Cente...|San Francisco Cal...|            68|          70|8/31/2015 23:07|8/31/2015 23:12|   538|\n",
      "| 913451|     896|Embarcadero at Fo...|Embarcadero at Sa...|            51|          60|8/31/2015 23:07|8/31/2015 23:22|   363|\n",
      "| 913450|     255|Embarcadero at Sa...|   Steuart at Market|            60|          74|8/31/2015 22:16|8/31/2015 22:20|   470|\n",
      "| 913449|     126|     Beale at Market|Temporary Transba...|            56|          55|8/31/2015 22:12|8/31/2015 22:15|   439|\n",
      "| 913448|     932|      Post at Kearny|South Van Ness at...|            47|          66|8/31/2015 21:57|8/31/2015 22:12|   472|\n",
      "| 913443|     691|Embarcadero at Sa...|   Market at Sansome|            60|          77|8/31/2015 21:49|8/31/2015 22:01|   434|\n",
      "| 913442|     633|      Market at 10th|San Francisco Cal...|            67|          70|8/31/2015 21:44|8/31/2015 21:54|   531|\n",
      "| 913441|     387|       Market at 4th|Grant Avenue at C...|            76|          73|8/31/2015 21:39|8/31/2015 21:46|   383|\n",
      "| 913440|     281|   Market at Sansome|Broadway St at Ba...|            77|          82|8/31/2015 21:31|8/31/2015 21:36|   621|\n",
      "| 913435|     424|Temporary Transba...|San Francisco Cal...|            55|          69|8/31/2015 21:25|8/31/2015 21:33|   602|\n",
      "| 913434|     283|San Francisco Cal...|     Townsend at 7th|            69|          65|8/31/2015 21:19|8/31/2015 21:24|   521|\n",
      "| 913433|     145|University and Em...|Cowper at University|            35|          37|8/31/2015 21:17|8/31/2015 21:20|    75|\n",
      "| 913432|     703|     Spear at Folsom|San Francisco Cal...|            49|          69|8/31/2015 21:16|8/31/2015 21:28|   426|\n",
      "| 913431|     605|Temporary Transba...|Grant Avenue at C...|            55|          73|8/31/2015 21:11|8/31/2015 21:21|   572|\n",
      "| 913429|     902|San Francisco Cal...|Broadway St at Ba...|            70|          82|8/31/2015 21:07|8/31/2015 21:22|   501|\n",
      "+-------+--------+--------------------+--------------------+--------------+------------+---------------+---------------+------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "g = GraphFrame(vertices,tripEdges)\n",
    "print g\n",
    "\n",
    "g.vertices.show()\n",
    "\n",
    "g.edges.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+--------+\n",
      "|                  id|inDegree|\n",
      "+--------------------+--------+\n",
      "|Embarcadero at Br...|    6687|\n",
      "|   Market at Sansome|   13916|\n",
      "|       Park at Olive|     417|\n",
      "|Washington at Kearny|    3481|\n",
      "|Evelyn Park and Ride|     725|\n",
      "|    San Pedro Square|    1595|\n",
      "|San Antonio Caltr...|    1046|\n",
      "|     2nd at Townsend|   15463|\n",
      "| San Salvador at 1st|     547|\n",
      "|         MLK Library|     960|\n",
      "+--------------------+--------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "inDg = g.inDegrees\n",
    "inDg.show(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+---------+\n",
      "|                  id|outDegree|\n",
      "+--------------------+---------+\n",
      "|Embarcadero at Br...|     7483|\n",
      "|   Market at Sansome|    11431|\n",
      "|       Park at Olive|      376|\n",
      "|Washington at Kearny|     2660|\n",
      "|Evelyn Park and Ride|      978|\n",
      "|    San Pedro Square|     1418|\n",
      "|San Antonio Caltr...|     1058|\n",
      "|     2nd at Townsend|    14026|\n",
      "|         MLK Library|     1099|\n",
      "| San Salvador at 1st|      495|\n",
      "+--------------------+---------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "out_Dg =g.outDegrees\n",
    "out_Dg.show(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "g.degrees.show()\n",
    "\n",
    "g.inDegrees.count()\n",
    "\n",
    "g.outDegrees.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+--------+\n",
      "|                  id|inDegree|\n",
      "+--------------------+--------+\n",
      "|San Francisco Cal...|   34810|\n",
      "|San Francisco Cal...|   22523|\n",
      "|Harry Bridges Pla...|   17810|\n",
      "|     2nd at Townsend|   15463|\n",
      "|     Townsend at 7th|   15422|\n",
      "|Embarcadero at Sa...|   15065|\n",
      "|   Market at Sansome|   13916|\n",
      "|   Steuart at Market|   13617|\n",
      "|Temporary Transba...|   12966|\n",
      "|  Powell Street BART|   10239|\n",
      "+--------------------+--------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "g.inDegrees.sort(desc(\"inDegree\")).show(10)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+---------+\n",
      "|                  id|outDegree|\n",
      "+--------------------+---------+\n",
      "|San Francisco Cal...|    26304|\n",
      "|San Francisco Cal...|    21758|\n",
      "|Harry Bridges Pla...|    17255|\n",
      "|Temporary Transba...|    14436|\n",
      "|Embarcadero at Sa...|    14158|\n",
      "|     2nd at Townsend|    14026|\n",
      "|     Townsend at 7th|    13752|\n",
      "|   Steuart at Market|    13687|\n",
      "|      Market at 10th|    11885|\n",
      "|   Market at Sansome|    11431|\n",
      "+--------------------+---------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "g.outDegrees.sort(desc(\"outDegree\")).show(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+--------------------+--------------------+\n",
      "|                   e|                   a|                   b|\n",
      "+--------------------+--------------------+--------------------+\n",
      "|[913460,765,Harry...|[Harry Bridges Pl...|[San Francisco Ca...|\n",
      "|[913459,1036,San ...|[San Antonio Shop...|[Mountain View Ci...|\n",
      "|[913454,409,San J...|[San Jose City Ha...|[San Salvador at ...|\n",
      "|[913453,789,Embar...|[Embarcadero at F...|[Embarcadero at S...|\n",
      "|[913452,293,Yerba...|[Yerba Buena Cent...|[San Francisco Ca...|\n",
      "|[913451,896,Embar...|[Embarcadero at F...|[Embarcadero at S...|\n",
      "|[913450,255,Embar...|[Embarcadero at S...|[Steuart at Marke...|\n",
      "|[913449,126,Beale...|[Beale at Market,...|[Temporary Transb...|\n",
      "|[913443,691,Embar...|[Embarcadero at S...|[Market at Sansom...|\n",
      "|[913442,633,Marke...|[Market at 10th,3...|[San Francisco Ca...|\n",
      "+--------------------+--------------------+--------------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "339030"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "motifs = g.find(\"(a)-[e]->(b)\")\n",
    "motifs.show(10)\n",
    "\n",
    "motifs.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pyspark.sql.functions as func\n",
    "topTrips = g.edges.groupBy(\"src\", \"dst\").agg(func.count(\"Trip ID\").alias(\"trips\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "topTrips.orderBy(topTrips.trips.desc()).limit(20).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+--------------------+--------------------+--------------------+--------------------+\n",
      "|                  ab|                   a|                   b|                  bc|                   c|\n",
      "+--------------------+--------------------+--------------------+--------------------+--------------------+\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[913386,1808,Emba...|[Harry Bridges Pl...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[913204,453,Embar...|[San Francisco Ca...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912934,366,Embar...|[San Francisco Ca...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912823,284,Embar...|[Harry Bridges Pl...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912653,317,Embar...|[San Francisco Ca...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912472,642,Embar...|[Embarcadero at S...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912419,629,Embar...|[Embarcadero at S...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912417,768,Embar...|[Embarcadero at S...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912416,788,Embar...|[Embarcadero at S...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912401,670,Embar...|[2nd at Folsom,37...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912365,570,Embar...|[Clay at Battery,...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912359,178,Embar...|[Spear at Folsom,...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912222,291,Embar...|[Steuart at Marke...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[911923,299,Embar...|[Embarcadero at F...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[911922,578,Embar...|[Yerba Buena Cent...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[911915,594,Embar...|[Townsend at 7th,...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[911911,592,Embar...|[Commercial at Mo...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[911883,632,Embar...|[Embarcadero at V...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[911820,394,Embar...|[San Francisco Ca...|\n",
      "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[911734,536,Embar...|[San Francisco Ca...|\n",
      "+--------------------+--------------------+--------------------+--------------------+--------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "\n",
    "motifs = g.find(\"(a)-[ab]->(b); (b)-[bc]->(c)\")\n",
    "motifs.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+---------+-----------+-------------------+\n",
      "|                  id|      lat|       long|           pagerank|\n",
      "+--------------------+---------+-----------+-------------------+\n",
      "|San Jose Diridon ...|37.329732|-121.901782|  1.214473817911346|\n",
      "|San Francisco Cal...|37.776617| -122.39526| 1.0496950452048421|\n",
      "|Redwood City Calt...|37.486078|-122.232089| 0.8006700627240692|\n",
      "|Mountain View Cal...|37.394358|-122.076713|  0.753839953327208|\n",
      "|San Francisco Cal...|  37.7766| -122.39547|  0.726896036435997|\n",
      "|Harry Bridges Pla...|37.795392|-122.394203| 0.5768070317865084|\n",
      "|     Townsend at 7th|37.771058|-122.402717| 0.5223761109233136|\n",
      "|     2nd at Townsend|37.780526|-122.390288| 0.5196239311404478|\n",
      "|Embarcadero at Sa...| 37.80477|-122.403234| 0.5166722126882114|\n",
      "|Palo Alto Caltrai...|37.443988|-122.164759| 0.5102959418842666|\n",
      "|   Market at Sansome|37.789625|-122.400811| 0.4985964480246575|\n",
      "|Santa Clara at Al...|37.333988|-121.894902| 0.4955425352860525|\n",
      "|   Steuart at Market|37.794139|-122.394434| 0.4924920182117113|\n",
      "|    San Pedro Square|37.336721|-121.894074| 0.4836753860383761|\n",
      "|Temporary Transba...|37.789756|-122.394643| 0.4821801498047339|\n",
      "|University and Em...|37.444521|-122.163093| 0.4633005230751974|\n",
      "|Stanford in Redwo...| 37.48537|-122.203288|0.41781405187031906|\n",
      "|San Antonio Shopp...|37.400443|-122.108338|0.40911028122622684|\n",
      "|      Market at 10th|37.776619|-122.417385|0.40629587315402715|\n",
      "|San Antonio Caltr...| 37.40694|-122.106758| 0.4049714017690149|\n",
      "+--------------------+---------+-----------+-------------------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#PageRank algorithm\n",
    "results = g.pageRank(resetProbability=0.15, maxIter = 2)\n",
    "\n",
    "results.vertices.orderBy(results.vertices.pagerank.desc()).limit(20).show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [Root]",
   "language": "python",
   "name": "Python [Root]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}