{
  "name": "KA Ingestion",
  "nodes": [
    {
      "parameters": {},
      "id": "d06a0df7-7b30-4c47-a961-65db9db333ad",
      "name": "Start Ingestion",
      "type": "n8n-nodes-base.manualTrigger",
      "typeVersion": 1,
      "position": [
        240,
        304
      ]
    },
    {
      "parameters": {
        "jsCode": "const base = 'https://gitlab.com/gitlab-com/content-sites/handbook/-/raw/main/content/handbook/';\nconst files = [\n  'values/_index.md',\n  'communication/_index.md',\n  'people-group/general-onboarding/_index.md',\n  'total-rewards/_index.md',\n  'people-group/offboarding/_index.md',\n  'tools-and-tips/_index.md',\n  'leadership/_index.md',\n  'company/culture/_index.md'\n];\nreturn files.map(f => ({ json: { url: base + f, source: f } }));"
      },
      "id": "c48e8048-159d-46f0-8d55-8ee733f1eb65",
      "name": "Corpus File List",
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        464,
        304
      ]
    },
    {
      "parameters": {
        "url": "={{ $json.url }}",
        "options": {
          "response": {
            "response": {
              "responseFormat": "text"
            }
          },
          "timeout": 30000
        }
      },
      "id": "73679a84-e3e5-4064-ad54-48d6d2c96f19",
      "name": "Fetch Document File",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.4,
      "position": [
        688,
        304
      ]
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "doc-content",
              "name": "content",
              "value": "={{ $json.data }}",
              "type": "string"
            },
            {
              "id": "doc-source",
              "name": "source",
              "value": "={{ $('Corpus File List').item.json.source }}",
              "type": "string"
            }
          ]
        },
        "options": {}
      },
      "id": "6d687380-ac1b-469d-949f-4095938853f0",
      "name": "Prepare Document",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        912,
        304
      ]
    },
    {
      "parameters": {
        "mode": "insert",
        "tableName": "document_vectors",
        "embeddingBatchSize": 50,
        "options": {}
      },
      "id": "2987539b-d34e-475e-9af4-7fddca28cb91",
      "name": "Insert Into Document Vectors",
      "type": "@n8n/n8n-nodes-langchain.vectorStorePGVector",
      "typeVersion": 1.3,
      "position": [
        1152,
        304
      ]
    },
    {
      "parameters": {
        "options": {}
      },
      "id": "32d43211-3bac-4a9f-a844-2d2b6fbc78a0",
      "name": "OpenAI Embeddings",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "typeVersion": 1.2,
      "position": [
        1072,
        528
      ]
    },
    {
      "parameters": {
        "jsonMode": "expressionData",
        "jsonData": "={{ $('Prepare Document').item.json.content }}",
        "textSplittingMode": "custom",
        "options": {
          "metadata": {
            "metadataValues": [
              {
                "name": "source",
                "value": "={{ $('Prepare Document').item.json.source }}"
              }
            ]
          }
        }
      },
      "id": "dd7e4dfb-9e9f-4224-85ca-2838301c68e3",
      "name": "Document Loader",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "typeVersion": 1.1,
      "position": [
        1232,
        528
      ]
    },
    {
      "parameters": {
        "chunkSize": 800,
        "chunkOverlap": 100,
        "options": {
          "splitCode": "markdown"
        }
      },
      "id": "1ff80417-42ff-4480-b3aa-51f343c0a72f",
      "name": "Chunk Splitter",
      "type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
      "typeVersion": 1,
      "position": [
        1312,
        640
      ]
    },
    {
      "parameters": {
        "resource": "fileFolder",
        "searchMethod": "query",
        "returnAll": true,
        "filter": {
          "folderId": {
            "__rl": true,
            "mode": "id",
            "value": "REPLACE-WITH-YOUR-DRIVE-FOLDER-ID"
          },
          "whatToSearch": "files"
        },
        "options": {}
      },
      "id": "0fbe9b43-8243-4826-b85d-7f881dc54834",
      "name": "List Drive Folder",
      "type": "n8n-nodes-base.googleDrive",
      "typeVersion": 3,
      "position": [
        464,
        48
      ]
    },
    {
      "parameters": {
        "operation": "download",
        "fileId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $json.id }}"
        },
        "options": {
          "binaryPropertyName": "data",
          "googleFileConversion": {
            "conversion": {
              "docsToFormat": "text/markdown"
            }
          }
        }
      },
      "id": "e81ff57b-97b0-4524-9c20-abd176e4f1b2",
      "name": "Download Drive File",
      "type": "n8n-nodes-base.googleDrive",
      "typeVersion": 3,
      "position": [
        688,
        48
      ]
    },
    {
      "parameters": {
        "conditions": {
          "options": {
            "caseSensitive": true,
            "leftValue": "",
            "typeValidation": "strict",
            "version": 1
          },
          "conditions": [
            {
              "id": "mime-pdf",
              "leftValue": "={{ $binary.data.mimeType }}",
              "rightValue": "application/pdf",
              "operator": {
                "type": "string",
                "operation": "equals"
              }
            }
          ],
          "combinator": "and"
        },
        "options": {}
      },
      "id": "9ac24f8a-f5b5-40aa-9d8e-a6cd9fdcef3c",
      "name": "Is PDF?",
      "type": "n8n-nodes-base.if",
      "typeVersion": 2.3,
      "position": [
        912,
        48
      ]
    },
    {
      "parameters": {
        "operation": "pdf",
        "options": {
          "joinPages": true
        }
      },
      "id": "88717212-f02e-4335-83fc-cbc5c9b16348",
      "name": "Extract PDF Text",
      "type": "n8n-nodes-base.extractFromFile",
      "typeVersion": 1.1,
      "position": [
        1120,
        -80
      ]
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "dp-content",
              "name": "content",
              "value": "={{ $json.text }}",
              "type": "string"
            },
            {
              "id": "dp-source",
              "name": "source",
              "value": "={{ $('Download Drive File').item.json.name }}",
              "type": "string"
            }
          ]
        },
        "options": {}
      },
      "id": "2b6333e1-0348-40a3-b906-d2c639ef0b52",
      "name": "Prepare Drive PDF",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        1344,
        -80
      ]
    },
    {
      "parameters": {
        "mode": "insert",
        "tableName": "document_vectors",
        "embeddingBatchSize": 50,
        "options": {}
      },
      "id": "ad69cefc-d9d9-4dd0-a078-444b2a4fafa3",
      "name": "Insert Drive PDFs",
      "type": "@n8n/n8n-nodes-langchain.vectorStorePGVector",
      "typeVersion": 1.3,
      "position": [
        1568,
        -80
      ],
      "onError": "continueRegularOutput"
    },
    {
      "parameters": {
        "options": {}
      },
      "id": "234e51b1-f2d6-4dfe-af30-cd15cb56db5d",
      "name": "OpenAI Embeddings (PDF)",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "typeVersion": 1.2,
      "position": [
        1488,
        -304
      ]
    },
    {
      "parameters": {
        "jsonMode": "expressionData",
        "jsonData": "={{ $('Prepare Drive PDF').item.json.content }}",
        "textSplittingMode": "custom",
        "options": {
          "metadata": {
            "metadataValues": [
              {
                "name": "source",
                "value": "={{ $('Prepare Drive PDF').item.json.source }}"
              }
            ]
          }
        }
      },
      "id": "0dda03b2-79a5-4cd9-9a9e-7d90286fee27",
      "name": "Document Loader (PDF text)",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "typeVersion": 1.1,
      "position": [
        1648,
        -304
      ]
    },
    {
      "parameters": {
        "chunkSize": 800,
        "chunkOverlap": 100,
        "options": {}
      },
      "id": "b389eb39-9905-4493-979f-fd00eb7d56c5",
      "name": "Chunk Splitter (PDF)",
      "type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
      "typeVersion": 1,
      "position": [
        1728,
        -176
      ]
    },
    {
      "parameters": {
        "mode": "insert",
        "tableName": "document_vectors",
        "embeddingBatchSize": 50,
        "options": {}
      },
      "id": "e67bed90-95f1-4ae0-85b5-ee7f017348de",
      "name": "Insert Drive Documents",
      "type": "@n8n/n8n-nodes-langchain.vectorStorePGVector",
      "typeVersion": 1.3,
      "position": [
        1120,
        128
      ],
      "onError": "continueRegularOutput"
    },
    {
      "parameters": {
        "options": {}
      },
      "id": "28471ab0-3ddb-4c67-8b7e-4653651fc09b",
      "name": "OpenAI Embeddings (Drive)",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "typeVersion": 1.2,
      "position": [
        1040,
        288
      ]
    },
    {
      "parameters": {
        "dataType": "binary",
        "textSplittingMode": "custom",
        "options": {
          "metadata": {
            "metadataValues": [
              {
                "name": "source",
                "value": "={{ $('Download Drive File').item.json.name }}"
              }
            ]
          }
        }
      },
      "id": "b4152718-87ac-430b-a91c-4091ded3e5ad",
      "name": "Document Loader (Drive, binary)",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "typeVersion": 1.1,
      "position": [
        1200,
        288
      ]
    },
    {
      "parameters": {
        "chunkSize": 800,
        "chunkOverlap": 100,
        "options": {
          "splitCode": "markdown"
        }
      },
      "id": "90a74e81-8b36-491c-bc1d-c0a8db547149",
      "name": "Chunk Splitter (Drive)",
      "type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
      "typeVersion": 1,
      "position": [
        1280,
        208
      ]
    }
  ],
  "connections": {
    "Start Ingestion": {
      "main": [
        [
          {
            "node": "Corpus File List",
            "type": "main",
            "index": 0
          },
          {
            "node": "List Drive Folder",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Corpus File List": {
      "main": [
        [
          {
            "node": "Fetch Document File",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Fetch Document File": {
      "main": [
        [
          {
            "node": "Prepare Document",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Prepare Document": {
      "main": [
        [
          {
            "node": "Insert Into Document Vectors",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Embeddings": {
      "ai_embedding": [
        [
          {
            "node": "Insert Into Document Vectors",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Document Loader": {
      "ai_document": [
        [
          {
            "node": "Insert Into Document Vectors",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "Chunk Splitter": {
      "ai_textSplitter": [
        [
          {
            "node": "Document Loader",
            "type": "ai_textSplitter",
            "index": 0
          }
        ]
      ]
    },
    "List Drive Folder": {
      "main": [
        [
          {
            "node": "Download Drive File",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Download Drive File": {
      "main": [
        [
          {
            "node": "Is PDF?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Is PDF?": {
      "main": [
        [
          {
            "node": "Extract PDF Text",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Insert Drive Documents",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract PDF Text": {
      "main": [
        [
          {
            "node": "Prepare Drive PDF",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Prepare Drive PDF": {
      "main": [
        [
          {
            "node": "Insert Drive PDFs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Embeddings (PDF)": {
      "ai_embedding": [
        [
          {
            "node": "Insert Drive PDFs",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Document Loader (PDF text)": {
      "ai_document": [
        [
          {
            "node": "Insert Drive PDFs",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "Chunk Splitter (PDF)": {
      "ai_textSplitter": [
        [
          {
            "node": "Document Loader (PDF text)",
            "type": "ai_textSplitter",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Embeddings (Drive)": {
      "ai_embedding": [
        [
          {
            "node": "Insert Drive Documents",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Document Loader (Drive, binary)": {
      "ai_document": [
        [
          {
            "node": "Insert Drive Documents",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "Chunk Splitter (Drive)": {
      "ai_textSplitter": [
        [
          {
            "node": "Document Loader (Drive, binary)",
            "type": "ai_textSplitter",
            "index": 0
          }
        ]
      ]
    }
  },
  "settings": {
    "executionOrder": "v1"
  },
  "pinData": {}
}