From 545d71fc54d409854c70ddbd65631df90177e9c8 Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 09:46:50 +0530 Subject: [PATCH 01/17] app.py --- visualize/app.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/visualize/app.py b/visualize/app.py index 01590b7..5340331 100644 --- a/visualize/app.py +++ b/visualize/app.py @@ -11,13 +11,13 @@ # Load environment variables from .env file load_dotenv() -cosmos_database = os.getenv("COSMOS_DATABASE") -cosmos_graph_collection = os.getenv("COSMOS_GRAPH_COLLECTION") -cosmos_key = os.getenv("COSMOS_KEY") -cosmos_endpoint = os.getenv("COSMOS_ENDPOINT") -search_key = os.getenv("SEARCH_KEY") -search_index = os.getenv("SEARCH_INDEX") -search_endpoint = os.getenv("SEARCH_ENDPOINT") +cosmos_database = os.getenv("ontology_nn") +cosmos_graph_collection = os.getenv("graphnn7") +cosmos_key = os.getenv("RcyO8fytL4FX7s8Lo9ZejRhvLwXLjN0Kp9GCUHXKTeyBLBuwrAPoAfDDBLPuoEh0jrqBMtBXbCw4ACDblJYPqg==") +cosmos_endpoint = os.getenv("wss://ontologypoc.gremlin.cosmos.azure.com:443/") +# search_key = os.getenv("SEARCH_KEY") +# search_index = os.getenv("SEARCH_INDEX") +# search_endpoint = os.getenv("SEARCH_ENDPOINT") class Transaction: From 6375c9ea4396956b7ab69cfd3e80411f775f3d74 Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 09:48:34 +0530 Subject: [PATCH 02/17] app.py --- visualize/app.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/visualize/app.py b/visualize/app.py index 5340331..03743ac 100644 --- a/visualize/app.py +++ b/visualize/app.py @@ -27,11 +27,11 @@ def __init__(self) -> None: self.credential = AzureKeyCredential(search_key) # Create cognitive search client - self.search_client = SearchClient( - endpoint=search_endpoint, - index_name=search_index, - credential=self.credential, - ) + # self.search_client = SearchClient( + # endpoint=search_endpoint, + # index_name=search_index, + # credential=self.credential, + # ) # Create cosmos client self.cql = client.Client( From b5f13ae1fa7a984a60042d37867dcf096d949b18 Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 09:54:25 +0530 Subject: [PATCH 03/17] Load_Airportsgraphdatasample.ipynb --- load_data/Load_Airportsgraphdatasample.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/load_data/Load_Airportsgraphdatasample.ipynb b/load_data/Load_Airportsgraphdatasample.ipynb index e6350ad..582c531 100644 --- a/load_data/Load_Airportsgraphdatasample.ipynb +++ b/load_data/Load_Airportsgraphdatasample.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"cc14dfef-7e6f-4167-99c4-2d52f66cdfdc","showTitle":false,"title":""}},"outputs":[],"source":["import os\n","import uuid\n","from array import array\n","from pyspark.sql import DataFrame\n","import pyspark.sql.functions as f\n","from pyspark.sql.types import StringType,BooleanType,StructType,StructField,IntegerType, DecimalType\n","from pyspark.sql.functions import lit\n","from decimal import Decimal\n","\n","f_uuid = f.udf(lambda: str(uuid.uuid4()), StringType())\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"f95f7542-927d-4be8-a85d-c1d5413f5ca1","showTitle":false,"title":""}},"outputs":[],"source":["cosmosEndpoint = \"https://xxxxxx.documents.azure.com:443/\"\n","cosmosMasterKey = \"*******\"\n","cosmosDatabaseName = \"*******\"\n","cosmosContainerName = \"*******\"\n","\n","cfg = {\n"," \"spark.cosmos.accountEndpoint\" : cosmosEndpoint,\n"," \"spark.cosmos.accountKey\" : cosmosMasterKey,\n"," \"spark.cosmos.database\" : cosmosDatabaseName,\n"," \"spark.cosmos.container\" : cosmosContainerName,\n","}\n","# Configure Catalog Api to be used\n","spark.conf.set(\"spark.sql.catalog.cosmosCatalog\", \"com.azure.cosmos.spark.CosmosCatalog\")\n","spark.conf.set(\"spark.sql.catalog.cosmosCatalog.spark.cosmos.accountEndpoint\", cosmosEndpoint)\n","spark.conf.set(\"spark.sql.catalog.cosmosCatalog.spark.cosmos.accountKey\", cosmosMasterKey)\n","spark.conf.set(\"spark.cosmos.throughputControl.enabled\",True)\n","spark.conf.set(\"spark.cosmos.throughputControl.targetThroughput\",20000)\n","\n","def write_to_cosmos_graph(df: DataFrame):\n"," \n"," df.write\\\n"," .format(\"cosmos.oltp\")\\\n"," .options(**cfg)\\\n"," .mode(\"Append\")\\\n"," .save()"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"2ff7cc9a-f1fd-46f8-bb1d-df35b0a0bbd0","showTitle":false,"title":""}},"outputs":[],"source":["def create_vertex_df(\n"," df: DataFrame,\n"," vertex_properties_col_name: list, partition_col: str,\n"," vertex_label: str,id: str\n","):\n"," columns = [id, partition_col,\"label\"]\n"," columns.extend(['nvl2({x}, array(named_struct(\"id\", uuid(), \"_value\", {x})), NULL) AS {x}'.format(x=x) for x in vertex_properties_col_name])\n"," if \"label\" in df.columns:\n"," df=df.withColumn(\"label\",df[vertex_label])\n"," else:\n"," df=df.withColumn(\"label\",f.lit(vertex_label))\n"," \n"," return df.selectExpr(*columns).withColumnRenamed(id,\"id\")\n"," "]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"bbcae6a8-9f97-4af2-b282-642670db3fdf","showTitle":false,"title":""}},"outputs":[],"source":["def create_edge_df(srcdf: DataFrame, destdf: DataFrame, label: str, partition_col: str, \n"," vertexidcol: str, sinkcol: str, sinklabel: str, vertexlabel: str, sinkpartitioncol: str,srcjoincol: str,destjoincol: str,isedgetable: bool):\n"," if(isedgetable):\n"," #we have edge table\n"," if(sinklabel in srcdf.columns):\n"," srcdf=srcdf.withColumn(\"_sinkLabel\",srcdf[sinklabel])\n"," else:\n"," srcdf=srcdf.withColumn(\"_sinkLabel\",f.lit(sinklabel))\n"," if(vertexlabel in srcdf.columns):\n"," srcdf=srcdf.withColumn(\"_vertexLabel\",srcdf[vertexlabel])\n"," else:\n"," srcdf=srcdf.withColumn(\"_vertexLabel\",f.lit(vertexlabel))\n"," srcdf=srcdf.selectExpr(\"_sinkLabel\",\"_vertexLabel\",srcjoincol,partition_col)\n"," destdf=destdf.selectExpr(label,destjoincol,vertexidcol,sinkcol,sinkpartitioncol)\n"," df=srcdf.join(destdf,srcdf[srcjoincol]==destdf[destjoincol],\"inner\")\n"," if(\"label\" in df.columns):\n"," df=df.withColumn(\"label\",df[label])\n"," else:\n"," df=df.withColumn(\"label\",f.lit(label))\n"," df=df.withColumn(\"_sink\",df[sinkcol]).withColumn(\"_sinkPartition\",df[sinkpartitioncol]).withColumn(\"_vertexId\",df[vertexidcol])\\\n"," .withColumn(\"id\",f_uuid()).withColumn(\"_isEdge\",f.lit(True))\n"," else:\n"," destdf=destdf.withColumn(\"_sink\",destdf[sinkcol]).withColumn(\"_sinkPartition\",destdf[sinkpartitioncol]).select(destjoincol,\"_sink\",\"_sinkPartition\")\n"," srcdf=srcdf.withColumn(\"_vertexId\",srcdf[vertexidcol]).select(srcjoincol,\"_vertexId\",partition_col)\n"," df=srcdf.join(destdf,srcdf[srcjoincol]==destdf[destjoincol],\"inner\")\n"," df=df.withColumn(\"label\",f.lit(label)).withColumn(\"id\",f_uuid()).withColumn(\"_sinkLabel\",f.lit(sinklabel))\\\n"," .withColumn(\"_vertexLabel\",f.lit(vertexlabel)).withColumn(\"_isEdge\",f.lit(True))\n"," \n"," columns=[\"label\",\"_sink\",\"_sinkLabel\",\"_vertexId\",\"_vertexLabel\",\"_isEdge\",\"_sinkPartition\",partition_col,\"id\"]\n"," return df.selectExpr(*columns)\n"," "]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"23bc9460-9911-4b8e-ba06-7b2ace9234e5","showTitle":false,"title":""}},"outputs":[],"source":["#vertex_airroutes\n","import pandas as pd\n","df=spark.createDataFrame(pd.read_csv(\"https://raw.githubusercontent.com/krlawrence/graph/master/sample-data/air-routes-latest-nodes.csv\"))\n","\n","airroutes=df.withColumn(\"srno\",df[\"~id\"]).withColumnRenamed(\"~id\",\"id\").withColumnRenamed(\"~label\",\"label\").withColumnRenamed(\"code:string\",\"code\")\\\n"," .withColumnRenamed(\"desc:string\",\"desc\").withColumnRenamed(\"country:string\",\"country\").withColumnRenamed(\"city:string\",\"city\")\\\n"," .selectExpr(\"cast(srno as string) srno\",\"cast(id as string) id\",\"label\",\"code\",\"desc\",\"country\",\"city\")\n","\n","airroutes.show()\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"70199341-695b-4361-8dbd-113e531061b0","showTitle":false,"title":""}},"outputs":[],"source":["#edges_airroutes\n","import pandas as pd\n","df=spark.createDataFrame(pd.read_csv(\"https://raw.githubusercontent.com/krlawrence/graph/master/sample-data/air-routes-latest-edges.csv\"))\n","\n","airroutesedges=df.withColumn(\"srno\",df[\"~id\"]).withColumnRenamed(\"~id\",\"id\").withColumnRenamed(\"~label\",\"label\").withColumnRenamed(\"~from\",\"from\")\\\n"," .withColumnRenamed(\"~to\",\"to\").withColumnRenamed(\"dist:int\",\"dist\")\\\n"," .selectExpr(\"id\",\"cast(from as string) from\",\"cast(to as string) to\",\"label\",\"dist\",\"srno\")\n","\n","airroutesedges.show()\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"cf8cd1dc-04b0-49fd-8bf0-9b27b0a3bf94","showTitle":false,"title":""}},"outputs":[],"source":["#Vertex\n","vertex_airroutes = create_vertex_df(\n"," df=airroutes,\n"," vertex_properties_col_name=[\"code\",\"desc\",\"country\",\"code\"],\n"," vertex_label = \"label\",id=\"id\",partition_col=\"srno\"\n",")\n","\n","vertex_airroutes.display()\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"83670e9e-a63b-4988-b547-a312ab133a2e","showTitle":false,"title":""}},"outputs":[],"source":["edges_airroutes=create_edge_df(airroutes,airroutesedges,\"label\",\"srno\",\"from\",\"to\",\"label\",\"label\",\"to\",\"srno\",\"from\",True)\n","\n","edges_airroutes.schema\n","\n","#edges_airroutes.show()"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"0db5fcf3-822f-4313-8782-9ee3eceddf66","showTitle":false,"title":""}},"outputs":[],"source":["#Write Vertex\n","write_to_cosmos_graph(vertex_airroutes)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"28a54f54-d08e-459e-ae60-6efdf74bcc23","showTitle":false,"title":""}},"outputs":[],"source":["#Write Edges\n","write_to_cosmos_graph(edges_airroutes)"]}],"metadata":{"application/vnd.databricks.v1+notebook":{"dashboards":[],"language":"python","notebookMetadata":{"pythonIndentUnit":2},"notebookName":"Airportsgraphdatasample","notebookOrigID":2336516133702252,"widgets":{}},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0} +{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"cc14dfef-7e6f-4167-99c4-2d52f66cdfdc","showTitle":false,"title":""}},"outputs":[],"source":["import os\n","import uuid\n","from array import array\n","from pyspark.sql import DataFrame\n","import pyspark.sql.functions as f\n","from pyspark.sql.types import StringType,BooleanType,StructType,StructField,IntegerType, DecimalType\n","from pyspark.sql.functions import lit\n","from decimal import Decimal\n","\n","f_uuid = f.udf(lambda: str(uuid.uuid4()), StringType())\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"f95f7542-927d-4be8-a85d-c1d5413f5ca1","showTitle":false,"title":""}},"outputs":[],"source":["cosmosEndpoint = \"wss://ontologypoc.gremlin.cosmos.azure.com:443/\"\n","cosmosMasterKey = \"RcyO8fytL4FX7s8Lo9ZejRhvLwXLjN0Kp9GCUHXKTeyBLBuwrAPoAfDDBLPuoEh0jrqBMtBXbCw4ACDblJYPqg==\"\n","cosmosDatabaseName = \"ontology_nn\"\n","cosmosContainerName = \"graphnn7\"\n","\n","cfg = {\n"," \"spark.cosmos.accountEndpoint\" : cosmosEndpoint,\n"," \"spark.cosmos.accountKey\" : cosmosMasterKey,\n"," \"spark.cosmos.database\" : cosmosDatabaseName,\n"," \"spark.cosmos.container\" : cosmosContainerName,\n","}\n","# Configure Catalog Api to be used\n","spark.conf.set(\"spark.sql.catalog.cosmosCatalog\", \"com.azure.cosmos.spark.CosmosCatalog\")\n","spark.conf.set(\"spark.sql.catalog.cosmosCatalog.spark.cosmos.accountEndpoint\", cosmosEndpoint)\n","spark.conf.set(\"spark.sql.catalog.cosmosCatalog.spark.cosmos.accountKey\", cosmosMasterKey)\n","spark.conf.set(\"spark.cosmos.throughputControl.enabled\",True)\n","spark.conf.set(\"spark.cosmos.throughputControl.targetThroughput\",20000)\n","\n","def write_to_cosmos_graph(df: DataFrame):\n"," \n"," df.write\\\n"," .format(\"cosmos.oltp\")\\\n"," .options(**cfg)\\\n"," .mode(\"Append\")\\\n"," .save()"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"2ff7cc9a-f1fd-46f8-bb1d-df35b0a0bbd0","showTitle":false,"title":""}},"outputs":[],"source":["def create_vertex_df(\n"," df: DataFrame,\n"," vertex_properties_col_name: list, partition_col: str,\n"," vertex_label: str,id: str\n","):\n"," columns = [id, partition_col,\"label\"]\n"," columns.extend(['nvl2({x}, array(named_struct(\"id\", uuid(), \"_value\", {x})), NULL) AS {x}'.format(x=x) for x in vertex_properties_col_name])\n"," if \"label\" in df.columns:\n"," df=df.withColumn(\"label\",df[vertex_label])\n"," else:\n"," df=df.withColumn(\"label\",f.lit(vertex_label))\n"," \n"," return df.selectExpr(*columns).withColumnRenamed(id,\"id\")\n"," "]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"bbcae6a8-9f97-4af2-b282-642670db3fdf","showTitle":false,"title":""}},"outputs":[],"source":["def create_edge_df(srcdf: DataFrame, destdf: DataFrame, label: str, partition_col: str, \n"," vertexidcol: str, sinkcol: str, sinklabel: str, vertexlabel: str, sinkpartitioncol: str,srcjoincol: str,destjoincol: str,isedgetable: bool):\n"," if(isedgetable):\n"," #we have edge table\n"," if(sinklabel in srcdf.columns):\n"," srcdf=srcdf.withColumn(\"_sinkLabel\",srcdf[sinklabel])\n"," else:\n"," srcdf=srcdf.withColumn(\"_sinkLabel\",f.lit(sinklabel))\n"," if(vertexlabel in srcdf.columns):\n"," srcdf=srcdf.withColumn(\"_vertexLabel\",srcdf[vertexlabel])\n"," else:\n"," srcdf=srcdf.withColumn(\"_vertexLabel\",f.lit(vertexlabel))\n"," srcdf=srcdf.selectExpr(\"_sinkLabel\",\"_vertexLabel\",srcjoincol,partition_col)\n"," destdf=destdf.selectExpr(label,destjoincol,vertexidcol,sinkcol,sinkpartitioncol)\n"," df=srcdf.join(destdf,srcdf[srcjoincol]==destdf[destjoincol],\"inner\")\n"," if(\"label\" in df.columns):\n"," df=df.withColumn(\"label\",df[label])\n"," else:\n"," df=df.withColumn(\"label\",f.lit(label))\n"," df=df.withColumn(\"_sink\",df[sinkcol]).withColumn(\"_sinkPartition\",df[sinkpartitioncol]).withColumn(\"_vertexId\",df[vertexidcol])\\\n"," .withColumn(\"id\",f_uuid()).withColumn(\"_isEdge\",f.lit(True))\n"," else:\n"," destdf=destdf.withColumn(\"_sink\",destdf[sinkcol]).withColumn(\"_sinkPartition\",destdf[sinkpartitioncol]).select(destjoincol,\"_sink\",\"_sinkPartition\")\n"," srcdf=srcdf.withColumn(\"_vertexId\",srcdf[vertexidcol]).select(srcjoincol,\"_vertexId\",partition_col)\n"," df=srcdf.join(destdf,srcdf[srcjoincol]==destdf[destjoincol],\"inner\")\n"," df=df.withColumn(\"label\",f.lit(label)).withColumn(\"id\",f_uuid()).withColumn(\"_sinkLabel\",f.lit(sinklabel))\\\n"," .withColumn(\"_vertexLabel\",f.lit(vertexlabel)).withColumn(\"_isEdge\",f.lit(True))\n"," \n"," columns=[\"label\",\"_sink\",\"_sinkLabel\",\"_vertexId\",\"_vertexLabel\",\"_isEdge\",\"_sinkPartition\",partition_col,\"id\"]\n"," return df.selectExpr(*columns)\n"," "]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"23bc9460-9911-4b8e-ba06-7b2ace9234e5","showTitle":false,"title":""}},"outputs":[],"source":["#vertex_airroutes\n","import pandas as pd\n","df=spark.createDataFrame(pd.read_csv(\"https://raw.githubusercontent.com/krlawrence/graph/master/sample-data/air-routes-latest-nodes.csv\"))\n","\n","airroutes=df.withColumn(\"srno\",df[\"~id\"]).withColumnRenamed(\"~id\",\"id\").withColumnRenamed(\"~label\",\"label\").withColumnRenamed(\"code:string\",\"code\")\\\n"," .withColumnRenamed(\"desc:string\",\"desc\").withColumnRenamed(\"country:string\",\"country\").withColumnRenamed(\"city:string\",\"city\")\\\n"," .selectExpr(\"cast(srno as string) srno\",\"cast(id as string) id\",\"label\",\"code\",\"desc\",\"country\",\"city\")\n","\n","airroutes.show()\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"70199341-695b-4361-8dbd-113e531061b0","showTitle":false,"title":""}},"outputs":[],"source":["#edges_airroutes\n","import pandas as pd\n","df=spark.createDataFrame(pd.read_csv(\"https://raw.githubusercontent.com/krlawrence/graph/master/sample-data/air-routes-latest-edges.csv\"))\n","\n","airroutesedges=df.withColumn(\"srno\",df[\"~id\"]).withColumnRenamed(\"~id\",\"id\").withColumnRenamed(\"~label\",\"label\").withColumnRenamed(\"~from\",\"from\")\\\n"," .withColumnRenamed(\"~to\",\"to\").withColumnRenamed(\"dist:int\",\"dist\")\\\n"," .selectExpr(\"id\",\"cast(from as string) from\",\"cast(to as string) to\",\"label\",\"dist\",\"srno\")\n","\n","airroutesedges.show()\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"cf8cd1dc-04b0-49fd-8bf0-9b27b0a3bf94","showTitle":false,"title":""}},"outputs":[],"source":["#Vertex\n","vertex_airroutes = create_vertex_df(\n"," df=airroutes,\n"," vertex_properties_col_name=[\"code\",\"desc\",\"country\",\"code\"],\n"," vertex_label = \"label\",id=\"id\",partition_col=\"srno\"\n",")\n","\n","vertex_airroutes.display()\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"83670e9e-a63b-4988-b547-a312ab133a2e","showTitle":false,"title":""}},"outputs":[],"source":["edges_airroutes=create_edge_df(airroutes,airroutesedges,\"label\",\"srno\",\"from\",\"to\",\"label\",\"label\",\"to\",\"srno\",\"from\",True)\n","\n","edges_airroutes.schema\n","\n","#edges_airroutes.show()"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"0db5fcf3-822f-4313-8782-9ee3eceddf66","showTitle":false,"title":""}},"outputs":[],"source":["#Write Vertex\n","write_to_cosmos_graph(vertex_airroutes)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"inputWidgets":{},"nuid":"28a54f54-d08e-459e-ae60-6efdf74bcc23","showTitle":false,"title":""}},"outputs":[],"source":["#Write Edges\n","write_to_cosmos_graph(edges_airroutes)"]}],"metadata":{"application/vnd.databricks.v1+notebook":{"dashboards":[],"language":"python","notebookMetadata":{"pythonIndentUnit":2},"notebookName":"Airportsgraphdatasample","notebookOrigID":2336516133702252,"widgets":{}},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0} From fe24b180bac17a76db1f3ae9c39ad0972d7b5369 Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 09:56:48 +0530 Subject: [PATCH 04/17] Load_Bank_transact_data.ipynb --- load_data/Load_Bank_transact_data.ipynb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/load_data/Load_Bank_transact_data.ipynb b/load_data/Load_Bank_transact_data.ipynb index 8408e1b..b4c3b21 100644 --- a/load_data/Load_Bank_transact_data.ipynb +++ b/load_data/Load_Bank_transact_data.ipynb @@ -22,10 +22,10 @@ "cell_type": "code", "source": [ "# update the values from your infra\r\n", - "cosmosEndpoint = \"https://cosmos-7w6s3xqb4piyi.documents.azure.com:443/\"\r\n", - "cosmosMasterKey = \"cosmos-key\" \r\n", - "cosmosDatabaseName = \"database01\"\r\n", - "cosmosContainerName = \"graph01\"" + "cosmosEndpoint = \"wss://ontologypoc.gremlin.cosmos.azure.com:443/\"\r\n", + "cosmosMasterKey = \"RcyO8fytL4FX7s8Lo9ZejRhvLwXLjN0Kp9GCUHXKTeyBLBuwrAPoAfDDBLPuoEh0jrqBMtBXbCw4ACDblJYPqg==\" \r\n", + "cosmosDatabaseName = \"ontology_nn\"\r\n", + "cosmosContainerName = \"graphnn7\"" ], "outputs": [], "execution_count": null, @@ -297,4 +297,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} From 70393cd5b57fc14fa8fdc1ec59b04c5d45e06cf0 Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 10:00:29 +0530 Subject: [PATCH 05/17] app.py --- visualize/app.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/visualize/app.py b/visualize/app.py index 03743ac..a824e1d 100644 --- a/visualize/app.py +++ b/visualize/app.py @@ -24,7 +24,7 @@ class Transaction: def __init__(self) -> None: # Limit the max number of results returned by a raw gremlin query to avoid excess RU's and timeouts self.GREMLIN_QUERY_LIMIT = 100 - self.credential = AzureKeyCredential(search_key) + # self.credential = AzureKeyCredential(search_key) # Create cognitive search client # self.search_client = SearchClient( @@ -133,14 +133,14 @@ def execute_gremlin_query(self, query: str) -> None: st.error(e) # Execute Azure search to find accounts either sent or received - def execute_search(self, search_text: str, filter=None) -> None: - accountId_list = [] - response = self.search_client.search( - search_text=search_text, - include_total_count=True, - filter=filter, - search_fields=["sink", "vertexId"], - ) + # def execute_search(self, search_text: str, filter=None) -> None: + # accountId_list = [] + # response = self.search_client.search( + # search_text=search_text, + # include_total_count=True, + # filter=filter, + # search_fields=["sink", "vertexId"], + # ) for r in response: accountId_list.append(r["vertexId"]) accountId_list.append(r["sink"]) From 0ad3d6b17a1f2a6d9d5906546f8c57ebef6462bd Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 11:10:17 +0530 Subject: [PATCH 06/17] dashboard-deploy.yml --- .github/workflows/dashboard-deploy.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/dashboard-deploy.yml b/.github/workflows/dashboard-deploy.yml index 08d102c..6a91d21 100644 --- a/.github/workflows/dashboard-deploy.yml +++ b/.github/workflows/dashboard-deploy.yml @@ -8,16 +8,16 @@ on: - .github/workflows/dashboard-deploy.yml env: - RESOURCE_GROUP: "ssatt-dev-rg" - COSMOS_ACCOUNT_NAME: "ebcbin5oofjcs" - COSMOS_DB_NAME: "database01" - COSMOS_GRAPH_COLLECTION: "graph01" - SEARCH_RESOURCE_GROUP: "demo-rg-01" - SEARCH_SERVICE_NAME: "ssattiraju-search-01" - AZURE_CONTAINER_REGISTRY: "ebcbin5oofjcs.azurecr.io" - SEARCH_INDEX: "cosmosdb-index" - STREAMLIT_SERVER_PORT: 8888 - STREAMLIT_SERVER_HEADLESS: "true" + RESOURCE_GROUP: "cosmosdb_ontologypoc_manish" + COSMOS_ACCOUNT_NAME: "ontologypoc" + COSMOS_DB_NAME: "ontology_nn" + COSMOS_GRAPH_COLLECTION: "graphnn7" + #SEARCH_RESOURCE_GROUP: "demo-rg-01" + #SEARCH_SERVICE_NAME: "ssattiraju-search-01" + AZURE_CONTAINER_REGISTRY: "ontologypoc" + #SEARCH_INDEX: "cosmosdb-index" + #STREAMLIT_SERVER_PORT: 8888 + #STREAMLIT_SERVER_HEADLESS: "true" jobs: dashboard-deploy: From 84b48d1b1ed45901d644976acd3764e345443aac Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 11:12:48 +0530 Subject: [PATCH 07/17] infra-deploy.yml --- .github/workflows/infra-deploy.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/infra-deploy.yml b/.github/workflows/infra-deploy.yml index 38985a7..9523849 100644 --- a/.github/workflows/infra-deploy.yml +++ b/.github/workflows/infra-deploy.yml @@ -8,8 +8,8 @@ on: - .github/workflows/infra-deploy.yml env: - AZURE_SUBSCRIPTION_ID: "7c1d967f-37f1-4047-bef7-05af9aa80fe2" - AZURE_LOCATION: "southeastasia" + AZURE_SUBSCRIPTION_ID: "c58a65e8-e2c6-4779-948c-29fdccfd9eba" + AZURE_LOCATION: "East US 2" jobs: validation: From 67dea2c2312faede8bc8c07d9b78853bceb3d497 Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 11:20:17 +0530 Subject: [PATCH 08/17] Load_Bank_transact_data.ipynb --- load_data/Load_Bank_transact_data.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/load_data/Load_Bank_transact_data.ipynb b/load_data/Load_Bank_transact_data.ipynb index b4c3b21..3594b72 100644 --- a/load_data/Load_Bank_transact_data.ipynb +++ b/load_data/Load_Bank_transact_data.ipynb @@ -45,7 +45,7 @@ "cell_type": "code", "source": [ "# update csv file path based on your infra\n", - "df = spark.read.load('abfss://data@7w6s3xqb4piyi.dfs.core.windows.net/PS_20174392719_1491204439457_log.csv', format='csv',header=True)\n", + "df = spark.read.load('https://owl2jsonpocvtrb.blob.core.windows.net/owl2jsonpoc', format='csv',header=True)\n", "display(df.limit(10))" ], "outputs": [], From 668fde8daa2bc07c68d6ec382b7a6faa856a545a Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 11:27:41 +0530 Subject: [PATCH 09/17] Load_Bank_transact_data.ipynb --- load_data/Load_Bank_transact_data.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/load_data/Load_Bank_transact_data.ipynb b/load_data/Load_Bank_transact_data.ipynb index 3594b72..9c8f719 100644 --- a/load_data/Load_Bank_transact_data.ipynb +++ b/load_data/Load_Bank_transact_data.ipynb @@ -45,7 +45,7 @@ "cell_type": "code", "source": [ "# update csv file path based on your infra\n", - "df = spark.read.load('https://owl2jsonpocvtrb.blob.core.windows.net/owl2jsonpoc', format='csv',header=True)\n", + "df = spark.read.load('https://owl2jsonpocvtrb.blob.core.windows.net/owl2jsonpoc/PS_20174392719_1491204439457_log.csv', format='csv',header=True)\n", "display(df.limit(10))" ], "outputs": [], From c083f9bfd3ec4188eb124030d37d7859f468697e Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 11:47:03 +0530 Subject: [PATCH 10/17] Load_Bank_transact_data.ipynb --- load_data/Load_Bank_transact_data.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/load_data/Load_Bank_transact_data.ipynb b/load_data/Load_Bank_transact_data.ipynb index 9c8f719..f4fe4a6 100644 --- a/load_data/Load_Bank_transact_data.ipynb +++ b/load_data/Load_Bank_transact_data.ipynb @@ -45,7 +45,7 @@ "cell_type": "code", "source": [ "# update csv file path based on your infra\n", - "df = spark.read.load('https://owl2jsonpocvtrb.blob.core.windows.net/owl2jsonpoc/PS_20174392719_1491204439457_log.csv', format='csv',header=True)\n", + "df = spark.read.load('https://novonordisk-my.sharepoint.com/:x:/r/personal/vtrb_novonordisk_com/Documents/Documents/PS_20174392719_1491204439457_log.csv?d=w78fdd8a4ac2946fa935934cce33311cb&csf=1&web=1&e=SI8dL4', format='csv',header=True)\n", "display(df.limit(10))" ], "outputs": [], From f067376df3622ba01a84f96bedf99fbe00526f04 Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 11:53:08 +0530 Subject: [PATCH 11/17] Load_Bank_transact_data.ipynb --- load_data/Load_Bank_transact_data.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/load_data/Load_Bank_transact_data.ipynb b/load_data/Load_Bank_transact_data.ipynb index f4fe4a6..ee4c01d 100644 --- a/load_data/Load_Bank_transact_data.ipynb +++ b/load_data/Load_Bank_transact_data.ipynb @@ -45,7 +45,7 @@ "cell_type": "code", "source": [ "# update csv file path based on your infra\n", - "df = spark.read.load('https://novonordisk-my.sharepoint.com/:x:/r/personal/vtrb_novonordisk_com/Documents/Documents/PS_20174392719_1491204439457_log.csv?d=w78fdd8a4ac2946fa935934cce33311cb&csf=1&web=1&e=SI8dL4', format='csv',header=True)\n", + "df = spark.read.load('https://drive.google.com/file/d/1Mt2yIE9e-Zj_3mRDid8Iqiq5j3Gp5w9R/view?usp=sharing', format='csv',header=True)\n", "display(df.limit(10))" ], "outputs": [], From 2a71dce27f5b28a70fb45398a0c23d2e226a16f3 Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 11:59:18 +0530 Subject: [PATCH 12/17] Load_Bank_transact_data.ipynb --- load_data/Load_Bank_transact_data.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/load_data/Load_Bank_transact_data.ipynb b/load_data/Load_Bank_transact_data.ipynb index ee4c01d..b4c3b21 100644 --- a/load_data/Load_Bank_transact_data.ipynb +++ b/load_data/Load_Bank_transact_data.ipynb @@ -45,7 +45,7 @@ "cell_type": "code", "source": [ "# update csv file path based on your infra\n", - "df = spark.read.load('https://drive.google.com/file/d/1Mt2yIE9e-Zj_3mRDid8Iqiq5j3Gp5w9R/view?usp=sharing', format='csv',header=True)\n", + "df = spark.read.load('abfss://data@7w6s3xqb4piyi.dfs.core.windows.net/PS_20174392719_1491204439457_log.csv', format='csv',header=True)\n", "display(df.limit(10))" ], "outputs": [], From d94f06918a28edc7d874499d78670263db47a901 Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 14:21:40 +0530 Subject: [PATCH 13/17] Load_Bank_transact_data.ipynb --- load_data/Load_Bank_transact_data.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/load_data/Load_Bank_transact_data.ipynb b/load_data/Load_Bank_transact_data.ipynb index b4c3b21..e9f148e 100644 --- a/load_data/Load_Bank_transact_data.ipynb +++ b/load_data/Load_Bank_transact_data.ipynb @@ -45,7 +45,7 @@ "cell_type": "code", "source": [ "# update csv file path based on your infra\n", - "df = spark.read.load('abfss://data@7w6s3xqb4piyi.dfs.core.windows.net/PS_20174392719_1491204439457_log.csv', format='csv',header=True)\n", + "df = spark.read.load('ManishRamana123/cosmosdb-graph-demo/load_data/data/PS_20174392719_1491204439457_log.csv', format='csv',header=True)\n", "display(df.limit(10))" ], "outputs": [], From 23a155ad30f7284ff57da717fd2da25cc242b45a Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 14:29:52 +0530 Subject: [PATCH 14/17] Load_Bank_transact_data.ipynb --- load_data/Load_Bank_transact_data.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/load_data/Load_Bank_transact_data.ipynb b/load_data/Load_Bank_transact_data.ipynb index e9f148e..b495164 100644 --- a/load_data/Load_Bank_transact_data.ipynb +++ b/load_data/Load_Bank_transact_data.ipynb @@ -45,7 +45,7 @@ "cell_type": "code", "source": [ "# update csv file path based on your infra\n", - "df = spark.read.load('ManishRamana123/cosmosdb-graph-demo/load_data/data/PS_20174392719_1491204439457_log.csv', format='csv',header=True)\n", + "df = spark.read.load('https://owl2jsonmanish.blob.core.windows.net/cosmosdbgp/PS_20174392719_1491204439457_log.csv', format='csv',header=True)\n", "display(df.limit(10))" ], "outputs": [], From c385e17fa595c220a7c133ab9645a293958ee8b8 Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Thu, 14 Mar 2024 17:29:23 +0530 Subject: [PATCH 15/17] Rename PS_20174392719_1491204439457_log.csv to PS_20174392719_14912041439457_log.csv --- ...491204439457_log.csv => PS_20174392719_14912041439457_log.csv} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename load_data/data/{PS_20174392719_1491204439457_log.csv => PS_20174392719_14912041439457_log.csv} (100%) diff --git a/load_data/data/PS_20174392719_1491204439457_log.csv b/load_data/data/PS_20174392719_14912041439457_log.csv similarity index 100% rename from load_data/data/PS_20174392719_1491204439457_log.csv rename to load_data/data/PS_20174392719_14912041439457_log.csv From 4631ea3a95b5936dce38cda8ccf4b3fb3cdf4944 Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Fri, 15 Mar 2024 14:50:44 +0530 Subject: [PATCH 16/17] Update PS_20174392719_14912041439457_log.csv --- load_data/data/PS_20174392719_14912041439457_log.csv | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/load_data/data/PS_20174392719_14912041439457_log.csv b/load_data/data/PS_20174392719_14912041439457_log.csv index 1a253be..8b13789 100644 --- a/load_data/data/PS_20174392719_14912041439457_log.csv +++ b/load_data/data/PS_20174392719_14912041439457_log.csv @@ -1,3 +1 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:16910f90577b0d981bf8ff289714510bb89bc71bff7d3f220f024e287e4eea6b -size 493534783 + From 44490274bec170a318575a39e7121fce708fe5c6 Mon Sep 17 00:00:00 2001 From: ManishRamana123 <163375506+ManishRamana123@users.noreply.github.com> Date: Fri, 15 Mar 2024 16:27:24 +0530 Subject: [PATCH 17/17] PS_20174392719_14912041439457_log.csv --- load_data/data/PS_20174392719_14912041439457_log.csv | 3 +++ 1 file changed, 3 insertions(+) diff --git a/load_data/data/PS_20174392719_14912041439457_log.csv b/load_data/data/PS_20174392719_14912041439457_log.csv index 8b13789..5290a04 100644 --- a/load_data/data/PS_20174392719_14912041439457_log.csv +++ b/load_data/data/PS_20174392719_14912041439457_log.csv @@ -1 +1,4 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16910f90577b0d981bf8ff289714510bb89bc71bff7d3f220f024e287e4eea6b +size 493534783