{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from graphframes import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "trip = sqlContext.read.format(\"com.databricks.spark.csv\").options(header='true', inferschema='true').load(\"file:///home/cloudera/trip.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "station = sqlContext.read.format(\"com.databricks.spark.csv\").options(header='true', inferschema='true').load(\"file:///home/cloudera/station.csv\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "DataFrame[Trip ID: int, Duration: int, Start Date: string, Start Station: string, Start Terminal: int, End Date: string, End Station: string, End Terminal: int, Bike #: int, Subscriber Type: string, Zip Code: string]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trip.registerTempTable(\"trip\")\n", "trip.cache()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "DataFrame[station_id: int, name: string, lat: double, long: double, dockcount: int, landmark: string, installation: string]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "station.registerTempTable(\"station\")\n", "station.cache()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+----------+--------------------+---------+-----------+---------+--------+------------+\n", "|station_id| name| lat| long|dockcount|landmark|installation|\n", "+----------+--------------------+---------+-----------+---------+--------+------------+\n", "| 2|San Jose Diridon ...|37.329732|-121.901782| 27|San Jose| 8/6/2013|\n", "| 3|San Jose Civic Ce...|37.330698|-121.888979| 15|San Jose| 8/5/2013|\n", "| 4|Santa Clara at Al...|37.333988|-121.894902| 11|San Jose| 8/6/2013|\n", "| 5| Adobe on Almaden|37.331415| -121.8932| 19|San Jose| 8/5/2013|\n", "+----------+--------------------+---------+-----------+---------+--------+------------+\n", "only showing top 4 rows\n", "\n" ] } ], "source": [ "sqlContext.sql(\"SELECT * from station\").show(4)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+-------+--------+---------------+--------------------+--------------+---------------+--------------------+------------+------+---------------+--------+\n", "|Trip ID|Duration| Start Date| Start Station|Start Terminal| End Date| End Station|End Terminal|Bike #|Subscriber Type|Zip Code|\n", "+-------+--------+---------------+--------------------+--------------+---------------+--------------------+------------+------+---------------+--------+\n", "| 913460| 765|8/31/2015 23:26|Harry Bridges Pla...| 50|8/31/2015 23:39|San Francisco Cal...| 70| 288| Subscriber| 2139|\n", "| 913459| 1036|8/31/2015 23:11|San Antonio Shopp...| 31|8/31/2015 23:28|Mountain View Cit...| 27| 35| Subscriber| 95032|\n", "| 913455| 307|8/31/2015 23:13| Post at Kearny| 47|8/31/2015 23:18| 2nd at South Park| 64| 468| Subscriber| 94107|\n", "| 913454| 409|8/31/2015 23:10| San Jose City Hall| 10|8/31/2015 23:17| San Salvador at 1st| 8| 68| Subscriber| 95113|\n", "+-------+--------+---------------+--------------------+--------------+---------------+--------------------+------------+------+---------------+--------+\n", "only showing top 4 rows\n", "\n" ] } ], "source": [ "sqlContext.sql(\"SELECT * from trip\").show(4)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "st_unique=sqlContext.sql(\"select distinct name,lat,long from station\")\n", "st_unique.registerTempTable(\"st_unique\")\n", "\n", "from pyspark.sql.functions import *\n", "from graphframes import *\n", "\n", "vertices = st_unique.withColumnRenamed(\"name\", \"id\").distinct()\n", "\n", "trip = trip.withColumnRenamed(\"Start Station\",\"src\")\n", "\n", "trip = trip.withColumnRenamed(\"End Station\",\"dst\")\n", "\n", "tripEdges = trip.select('Trip ID', 'Duration','src', 'dst','Start Terminal','End Terminal','Start Date','End Date','BIke #')\n", "\n", "tripEdges.cache()\n", "vertices.cache()\n", "\n", "vertices.show(4)\n", "\n", "tripEdges.show(4)\n", "\n", "\n", "g = GraphFrame(vertices,tripEdges)\n", "print g\n", "\n", "g.vertices.show()\n", "\n", "g.edges.show()\n", "\n", "inDg = g.inDegrees\n", "inDg.show(10)\n", "\n", "out_Dg =g.outDegrees\n", "out_Dg.show(10)\n", "\n", "g.degrees.show()\n", "\n", "g.inDegrees.count()\n", "\n", "g.outDegrees.count()\n", "\n", "g.inDegrees.sort(desc(\"inDegree\")).show(10)\n", "\n", "g.outDegrees.sort(desc(\"outDegree\")).show(10)\n", "\n", "motifs = g.find(\"(a)-[e]->(b)\")\n", "motifs.show()\n", "\n", "motifs.e.count()\n", "\n", "\n", "import pyspark.sql.functions as func\n", "topTrips = g.edges.groupBy(\"src\", \"dst\").agg(func.count(\"Trip ID\").alias(\"trips\"))\n", "\n", "\n", "topTrips.orderBy(topTrips.trips.desc()).limit(20).show()\n", "\n", "\n", "#PageRank algorithm\n", "results = g.pageRank(resetProbability=0.15, maxIter = 2)\n", "\n", "results.vertices.orderBy(results.vertices.pagerank.desc()).limit(20).show()\n", "\n", "motifs = g.find(\"(a)-[ab]->(b); (b)-[bc]->(c)\")\n", "motifs.show()\n", "\n", "###outdegrees/indegrees\n", "dg_Ratio = inDg.join(out_Dg,inDg.id == out_Dg.id).drop(out_Dg.id)\n", "dg_Ratio.show()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "st_unique=sqlContext.sql(\"select distinct name,lat,long from station\")\n", "st_unique.registerTempTable(\"st_unique\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from pyspark.sql.functions import *\n", "from graphframes import *\n", "\n", "vertices = st_unique.withColumnRenamed(\"name\", \"id\").distinct()\n", "\n", "trip = trip.withColumnRenamed(\"Start Station\",\"src\")\n", "\n", "trip = trip.withColumnRenamed(\"End Station\",\"dst\")\n", "\n", "tripEdges = trip.select('Trip ID', 'Duration','src', 'dst','Start Terminal','End Terminal','Start Date','End Date','BIke #')\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------------------+---------+-----------+\n", "| id| lat| long|\n", "+--------------------+---------+-----------+\n", "| 2nd at Folsom|37.785299|-122.396236|\n", "| 2nd at South Park|37.782259|-122.392738|\n", "|Rengstorff Avenue...|37.400241|-122.099076|\n", "|California Ave Ca...|37.429082|-122.142805|\n", "+--------------------+---------+-----------+\n", "only showing top 4 rows\n", "\n", "+-------+--------+--------------------+--------------------+--------------+------------+---------------+---------------+------+\n", "|Trip ID|Duration| src| dst|Start Terminal|End Terminal| Start Date| End Date|BIke #|\n", "+-------+--------+--------------------+--------------------+--------------+------------+---------------+---------------+------+\n", "| 913460| 765|Harry Bridges Pla...|San Francisco Cal...| 50| 70|8/31/2015 23:26|8/31/2015 23:39| 288|\n", "| 913459| 1036|San Antonio Shopp...|Mountain View Cit...| 31| 27|8/31/2015 23:11|8/31/2015 23:28| 35|\n", "| 913455| 307| Post at Kearny| 2nd at South Park| 47| 64|8/31/2015 23:13|8/31/2015 23:18| 468|\n", "| 913454| 409| San Jose City Hall| San Salvador at 1st| 10| 8|8/31/2015 23:10|8/31/2015 23:17| 68|\n", "+-------+--------+--------------------+--------------------+--------------+------------+---------------+---------------+------+\n", "only showing top 4 rows\n", "\n" ] } ], "source": [ "tripEdges.cache()\n", "vertices.cache()\n", "vertices.show(4)\n", "tripEdges.show(4)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GraphFrame(v:[id: string, lat: double, long: double], e:[src: string, dst: string, Trip ID: int, Duration: int, Start Terminal: int, End Terminal: int, Start Date: string, End Date: string, BIke #: int])\n", "+--------------------+---------+-----------+\n", "| id| lat| long|\n", "+--------------------+---------+-----------+\n", "| 2nd at Folsom|37.785299|-122.396236|\n", "| 2nd at South Park|37.782259|-122.392738|\n", "|Rengstorff Avenue...|37.400241|-122.099076|\n", "|California Ave Ca...|37.429082|-122.142805|\n", "|Cowper at University|37.448598|-122.159504|\n", "|Harry Bridges Pla...|37.795392|-122.394203|\n", "| Ryland Park|37.342725|-121.895617|\n", "| San Jose City Hall|37.337391|-121.886995|\n", "|Embarcadero at Br...|37.787152|-122.388013|\n", "| 5th at Howard|37.781752|-122.405127|\n", "|SJSU - San Salvad...|37.333955|-121.877349|\n", "|Commercial at Mon...|37.794231|-122.402923|\n", "|Evelyn Park and Ride|37.390277|-122.066553|\n", "| Clay at Battery|37.795001| -122.39997|\n", "|San Antonio Caltr...| 37.40694|-122.106758|\n", "| Market at 4th|37.786305|-122.404966|\n", "|SJSU 4th at San C...|37.332808|-121.883891|\n", "|Santa Clara Count...|37.352601|-121.905733|\n", "|San Francisco Cit...| 37.77865|-122.418235|\n", "|Mountain View Cit...|37.389218|-122.081896|\n", "+--------------------+---------+-----------+\n", "only showing top 20 rows\n", "\n", "+-------+--------+--------------------+--------------------+--------------+------------+---------------+---------------+------+\n", "|Trip ID|Duration| src| dst|Start Terminal|End Terminal| Start Date| End Date|BIke #|\n", "+-------+--------+--------------------+--------------------+--------------+------------+---------------+---------------+------+\n", "| 913460| 765|Harry Bridges Pla...|San Francisco Cal...| 50| 70|8/31/2015 23:26|8/31/2015 23:39| 288|\n", "| 913459| 1036|San Antonio Shopp...|Mountain View Cit...| 31| 27|8/31/2015 23:11|8/31/2015 23:28| 35|\n", "| 913455| 307| Post at Kearny| 2nd at South Park| 47| 64|8/31/2015 23:13|8/31/2015 23:18| 468|\n", "| 913454| 409| San Jose City Hall| San Salvador at 1st| 10| 8|8/31/2015 23:10|8/31/2015 23:17| 68|\n", "| 913453| 789|Embarcadero at Fo...|Embarcadero at Sa...| 51| 60|8/31/2015 23:09|8/31/2015 23:22| 487|\n", "| 913452| 293|Yerba Buena Cente...|San Francisco Cal...| 68| 70|8/31/2015 23:07|8/31/2015 23:12| 538|\n", "| 913451| 896|Embarcadero at Fo...|Embarcadero at Sa...| 51| 60|8/31/2015 23:07|8/31/2015 23:22| 363|\n", "| 913450| 255|Embarcadero at Sa...| Steuart at Market| 60| 74|8/31/2015 22:16|8/31/2015 22:20| 470|\n", "| 913449| 126| Beale at Market|Temporary Transba...| 56| 55|8/31/2015 22:12|8/31/2015 22:15| 439|\n", "| 913448| 932| Post at Kearny|South Van Ness at...| 47| 66|8/31/2015 21:57|8/31/2015 22:12| 472|\n", "| 913443| 691|Embarcadero at Sa...| Market at Sansome| 60| 77|8/31/2015 21:49|8/31/2015 22:01| 434|\n", "| 913442| 633| Market at 10th|San Francisco Cal...| 67| 70|8/31/2015 21:44|8/31/2015 21:54| 531|\n", "| 913441| 387| Market at 4th|Grant Avenue at C...| 76| 73|8/31/2015 21:39|8/31/2015 21:46| 383|\n", "| 913440| 281| Market at Sansome|Broadway St at Ba...| 77| 82|8/31/2015 21:31|8/31/2015 21:36| 621|\n", "| 913435| 424|Temporary Transba...|San Francisco Cal...| 55| 69|8/31/2015 21:25|8/31/2015 21:33| 602|\n", "| 913434| 283|San Francisco Cal...| Townsend at 7th| 69| 65|8/31/2015 21:19|8/31/2015 21:24| 521|\n", "| 913433| 145|University and Em...|Cowper at University| 35| 37|8/31/2015 21:17|8/31/2015 21:20| 75|\n", "| 913432| 703| Spear at Folsom|San Francisco Cal...| 49| 69|8/31/2015 21:16|8/31/2015 21:28| 426|\n", "| 913431| 605|Temporary Transba...|Grant Avenue at C...| 55| 73|8/31/2015 21:11|8/31/2015 21:21| 572|\n", "| 913429| 902|San Francisco Cal...|Broadway St at Ba...| 70| 82|8/31/2015 21:07|8/31/2015 21:22| 501|\n", "+-------+--------+--------------------+--------------------+--------------+------------+---------------+---------------+------+\n", "only showing top 20 rows\n", "\n" ] } ], "source": [ "g = GraphFrame(vertices,tripEdges)\n", "print g\n", "\n", "g.vertices.show()\n", "\n", "g.edges.show()\n" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------------------+--------+\n", "| id|inDegree|\n", "+--------------------+--------+\n", "|Embarcadero at Br...| 6687|\n", "| Market at Sansome| 13916|\n", "| Park at Olive| 417|\n", "|Washington at Kearny| 3481|\n", "|Evelyn Park and Ride| 725|\n", "| San Pedro Square| 1595|\n", "|San Antonio Caltr...| 1046|\n", "| 2nd at Townsend| 15463|\n", "| San Salvador at 1st| 547|\n", "| MLK Library| 960|\n", "+--------------------+--------+\n", "only showing top 10 rows\n", "\n" ] } ], "source": [ "inDg = g.inDegrees\n", "inDg.show(10)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------------------+---------+\n", "| id|outDegree|\n", "+--------------------+---------+\n", "|Embarcadero at Br...| 7483|\n", "| Market at Sansome| 11431|\n", "| Park at Olive| 376|\n", "|Washington at Kearny| 2660|\n", "|Evelyn Park and Ride| 978|\n", "| San Pedro Square| 1418|\n", "|San Antonio Caltr...| 1058|\n", "| 2nd at Townsend| 14026|\n", "| MLK Library| 1099|\n", "| San Salvador at 1st| 495|\n", "+--------------------+---------+\n", "only showing top 10 rows\n", "\n" ] } ], "source": [ "out_Dg =g.outDegrees\n", "out_Dg.show(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "g.degrees.show()\n", "\n", "g.inDegrees.count()\n", "\n", "g.outDegrees.count()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------------------+--------+\n", "| id|inDegree|\n", "+--------------------+--------+\n", "|San Francisco Cal...| 34810|\n", "|San Francisco Cal...| 22523|\n", "|Harry Bridges Pla...| 17810|\n", "| 2nd at Townsend| 15463|\n", "| Townsend at 7th| 15422|\n", "|Embarcadero at Sa...| 15065|\n", "| Market at Sansome| 13916|\n", "| Steuart at Market| 13617|\n", "|Temporary Transba...| 12966|\n", "| Powell Street BART| 10239|\n", "+--------------------+--------+\n", "only showing top 10 rows\n", "\n" ] } ], "source": [ "g.inDegrees.sort(desc(\"inDegree\")).show(10)\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------------------+---------+\n", "| id|outDegree|\n", "+--------------------+---------+\n", "|San Francisco Cal...| 26304|\n", "|San Francisco Cal...| 21758|\n", "|Harry Bridges Pla...| 17255|\n", "|Temporary Transba...| 14436|\n", "|Embarcadero at Sa...| 14158|\n", "| 2nd at Townsend| 14026|\n", "| Townsend at 7th| 13752|\n", "| Steuart at Market| 13687|\n", "| Market at 10th| 11885|\n", "| Market at Sansome| 11431|\n", "+--------------------+---------+\n", "only showing top 10 rows\n", "\n" ] } ], "source": [ "g.outDegrees.sort(desc(\"outDegree\")).show(10)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------------------+--------------------+--------------------+\n", "| e| a| b|\n", "+--------------------+--------------------+--------------------+\n", "|[913460,765,Harry...|[Harry Bridges Pl...|[San Francisco Ca...|\n", "|[913459,1036,San ...|[San Antonio Shop...|[Mountain View Ci...|\n", "|[913454,409,San J...|[San Jose City Ha...|[San Salvador at ...|\n", "|[913453,789,Embar...|[Embarcadero at F...|[Embarcadero at S...|\n", "|[913452,293,Yerba...|[Yerba Buena Cent...|[San Francisco Ca...|\n", "|[913451,896,Embar...|[Embarcadero at F...|[Embarcadero at S...|\n", "|[913450,255,Embar...|[Embarcadero at S...|[Steuart at Marke...|\n", "|[913449,126,Beale...|[Beale at Market,...|[Temporary Transb...|\n", "|[913443,691,Embar...|[Embarcadero at S...|[Market at Sansom...|\n", "|[913442,633,Marke...|[Market at 10th,3...|[San Francisco Ca...|\n", "+--------------------+--------------------+--------------------+\n", "only showing top 10 rows\n", "\n" ] }, { "data": { "text/plain": [ "339030" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "motifs = g.find(\"(a)-[e]->(b)\")\n", "motifs.show(10)\n", "\n", "motifs.count()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pyspark.sql.functions as func\n", "topTrips = g.edges.groupBy(\"src\", \"dst\").agg(func.count(\"Trip ID\").alias(\"trips\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "topTrips.orderBy(topTrips.trips.desc()).limit(20).show()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------------------+--------------------+--------------------+--------------------+--------------------+\n", "| ab| a| b| bc| c|\n", "+--------------------+--------------------+--------------------+--------------------+--------------------+\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[913386,1808,Emba...|[Harry Bridges Pl...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[913204,453,Embar...|[San Francisco Ca...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912934,366,Embar...|[San Francisco Ca...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912823,284,Embar...|[Harry Bridges Pl...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912653,317,Embar...|[San Francisco Ca...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912472,642,Embar...|[Embarcadero at S...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912419,629,Embar...|[Embarcadero at S...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912417,768,Embar...|[Embarcadero at S...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912416,788,Embar...|[Embarcadero at S...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912401,670,Embar...|[2nd at Folsom,37...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912365,570,Embar...|[Clay at Battery,...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912359,178,Embar...|[Spear at Folsom,...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[912222,291,Embar...|[Steuart at Marke...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[911923,299,Embar...|[Embarcadero at F...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[911922,578,Embar...|[Yerba Buena Cent...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[911915,594,Embar...|[Townsend at 7th,...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[911911,592,Embar...|[Commercial at Mo...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[911883,632,Embar...|[Embarcadero at V...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[911820,394,Embar...|[San Francisco Ca...|\n", "|[913415,274,Harry...|[Harry Bridges Pl...|[Embarcadero at B...|[911734,536,Embar...|[San Francisco Ca...|\n", "+--------------------+--------------------+--------------------+--------------------+--------------------+\n", "only showing top 20 rows\n", "\n" ] } ], "source": [ "\n", "motifs = g.find(\"(a)-[ab]->(b); (b)-[bc]->(c)\")\n", "motifs.show()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------------------+---------+-----------+-------------------+\n", "| id| lat| long| pagerank|\n", "+--------------------+---------+-----------+-------------------+\n", "|San Jose Diridon ...|37.329732|-121.901782| 1.214473817911346|\n", "|San Francisco Cal...|37.776617| -122.39526| 1.0496950452048421|\n", "|Redwood City Calt...|37.486078|-122.232089| 0.8006700627240692|\n", "|Mountain View Cal...|37.394358|-122.076713| 0.753839953327208|\n", "|San Francisco Cal...| 37.7766| -122.39547| 0.726896036435997|\n", "|Harry Bridges Pla...|37.795392|-122.394203| 0.5768070317865084|\n", "| Townsend at 7th|37.771058|-122.402717| 0.5223761109233136|\n", "| 2nd at Townsend|37.780526|-122.390288| 0.5196239311404478|\n", "|Embarcadero at Sa...| 37.80477|-122.403234| 0.5166722126882114|\n", "|Palo Alto Caltrai...|37.443988|-122.164759| 0.5102959418842666|\n", "| Market at Sansome|37.789625|-122.400811| 0.4985964480246575|\n", "|Santa Clara at Al...|37.333988|-121.894902| 0.4955425352860525|\n", "| Steuart at Market|37.794139|-122.394434| 0.4924920182117113|\n", "| San Pedro Square|37.336721|-121.894074| 0.4836753860383761|\n", "|Temporary Transba...|37.789756|-122.394643| 0.4821801498047339|\n", "|University and Em...|37.444521|-122.163093| 0.4633005230751974|\n", "|Stanford in Redwo...| 37.48537|-122.203288|0.41781405187031906|\n", "|San Antonio Shopp...|37.400443|-122.108338|0.40911028122622684|\n", "| Market at 10th|37.776619|-122.417385|0.40629587315402715|\n", "|San Antonio Caltr...| 37.40694|-122.106758| 0.4049714017690149|\n", "+--------------------+---------+-----------+-------------------+\n", "\n" ] } ], "source": [ "#PageRank algorithm\n", "results = g.pageRank(resetProbability=0.15, maxIter = 2)\n", "\n", "results.vertices.orderBy(results.vertices.pagerank.desc()).limit(20).show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python [Root]", "language": "python", "name": "Python [Root]" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 0 }