Coyote/driver/hypervisor/hypervisor_ops.c

1772 lines
51 KiB
C

/**
* Copyright (c) 2023, Systems Group, ETH Zurich
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "hypervisor_ops.h"
/**
* @brief Set the up capability section of the pci config space.
* At the moment this includes the MSIX capability and the PCI express
* endpoint capability which is copied from the actual hardware.
* This of course only happens if we are on PCI platform.
*
* @param cfg
* @param pd
*/
static void set_up_cap_section(struct pci_config_space *cfg, struct bus_drvdata *pd)
{
struct cap_header *header;
uint8_t offset;
int i;
// Init at the first position for the cap list
cfg->cap_pointer = MSIX_OFFSET;
// Let the OS know that this device has caps
cfg->status |= BIT(4);
set_up_msix_header(cfg, MSIX_OFFSET, NUM_INTERRUPTS);
// Get header
header = (struct cap_header *)((uint8_t *)cfg + MSIX_OFFSET);
// If we are on a PCI platform, copy the endpoint header
if (cyt_arch == CYT_ARCH_PCI)
{
// Point to the next cap
header->next_pointer = MSIX_OFFSET + MSIX_SIZE;
// Copy cap struct from pci device
offset = pci_find_capability(pd->pci_dev, PCI_CAP_ID_EXP);
for (i = 0; i < PCI_CAP_PCIX_SIZEOF_V2; i++)
{
pci_read_config_byte(pd->pci_dev,
offset + i, (uint8_t *)cfg + MSIX_OFFSET + MSIX_SIZE + i);
}
header = (struct cap_header *)((uint8_t *)cfg + MSIX_OFFSET + MSIX_SIZE);
}
header->next_pointer = 0;
}
/**
* @brief Sets the pci configuration of the mdev
* on creation of the virtual device. The configuration space
* communicates to the guest os that we use 3 64bit bars
* and communicates the size of these bars. Furthermore
* we emulate the the vendor_id 0x0102 and device_id 0x0304.
* TODO: Discuss a change to the illeagal address 0xffff
* for the vendor id. QEMU replaces this illeageal id
* with an free vendor id. However, this might not be the
* case for other emulation solutions.
*
* @param cfg pointer to pci_config_space
*/
static void set_pci_config(struct pci_config_space *cfg,
struct bus_drvdata *pd)
{
uint64_t bar0_addr, bar2_addr, bar4_addr;
// Clear memory
memset(cfg, 0, sizeof(struct pci_config_space));
// Device id
cfg->vendor_id = 0x0102;
cfg->device_id = 0x0304;
// Can control the (virtual) bus
cfg->command = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
// No status
cfg->status = 0x0200;
cfg->revison_id = 0x10;
cfg->programming_interface = 0x00;
cfg->subclass = 0x00;
cfg->class_code = 0xff;
// Bar0: 64 bit in memory mode
bar0_addr = COYOTE_HYPERVISOR_BAR0_MASK |
PCI_BASE_ADDRESS_MEM_TYPE_64 |
PCI_BASE_ADDRESS_SPACE_MEMORY;
if (COYOTE_HYPERVISOR_BAR0_SIZE)
{
cfg->bar0 = bar0_addr;
cfg->bar1 = (uint32_t)(bar0_addr >> 32);
}
// Bar2: 64 bit in memory mode
bar2_addr = COYOTE_HYPERVISOR_BAR2_MASK |
PCI_BASE_ADDRESS_MEM_TYPE_64 |
PCI_BASE_ADDRESS_SPACE_MEMORY;
if (COYOTE_HYPERVISOR_BAR2_SIZE)
{
cfg->bar2 = (uint32_t)bar2_addr;
cfg->bar3 = (uint32_t)(bar2_addr >> 32);
}
// Bar4: 64 bit in memory mode
bar4_addr = COYOTE_HYPERVISOR_BAR4_MASK |
PCI_BASE_ADDRESS_MEM_TYPE_64 |
PCI_BASE_ADDRESS_SPACE_MEMORY;
if (COYOTE_HYPERVISOR_BAR4_SIZE)
{
cfg->bar4 = (uint32_t)bar4_addr;
cfg->bar5 = (uint32_t)(bar4_addr >> 32);
}
// Init cap list
set_up_cap_section(cfg, pd);
// Interrupt pin
cfg->interrupt_pin = 0x01;
}
/**
* @brief handles create operation for the mediated device
* allocates data structures and does basic initizalization.
* This function should only called from the mdev framework!
*
* @param mdev passed from the vfio_mdev framework
* @return int zero on success
*/
static int hypervisor_vfpga_create(struct mdev_device *mdev)
{
struct device *dev;
struct bus_drvdata *d;
struct fpga_dev *fpga;
struct m_fpga_dev *m_vfpga;
int i;
int ret_val;
ret_val = 0;
dbg_info("Create start.\n");
// Load parent data to find out
// the fpga region we belong to
BUG_ON(!mdev);
dev = mdev_parent_dev(mdev);
if (!dev)
{
pr_err("Failed to get mdev parent device\n");
return -EIO;
}
d = dev_get_drvdata(dev);
if (!d)
{
pr_err("Failed to get bus drv data\n");
return -EIO;
}
dbg_info("Got info\n");
// Get id of parent device by parsing the name
sscanf(dev->kobj.name, "fpga%d", &i);
dbg_info("Create for parent device %d\n", i);
// use the id to get the fpga_dev
fpga = &d->fpga_dev[i];
BUG_ON(!fpga);
dbg_info("Got fpga\n");
// allocate mangement struct for the new mediated device
m_vfpga = kzalloc(sizeof(struct m_fpga_dev), GFP_KERNEL);
if (!m_vfpga)
{
pr_err("failed to allocate m_vfpga memory\n");
return -ENOMEM;
}
m_vfpga->fpga = fpga;
m_vfpga->current_cpid = INVALID_CPID;
// Get vcid. Unique per fpga region and
// used to make the pid unique
spin_lock(&fpga->vcid_lock);
m_vfpga->id = fpga->vcid_alloc->id;
fpga->vcid_alloc = fpga->vcid_alloc->next;
fpga->num_free_vcid_chunks -= 1;
spin_unlock(&fpga->vcid_lock);
// Set up pci config
set_pci_config(&m_vfpga->pci_config, d);
// Init locks
spin_lock_init(&m_vfpga->lock);
spin_lock_init(&m_vfpga->current_cpid_lock);
// add into the list of virtual fpgas
spin_lock(&fpga->list_lock);
list_add(&m_vfpga->next, &fpga->mdev_list);
spin_unlock(&fpga->list_lock);
mdev_set_drvdata(mdev, m_vfpga);
dbg_info("successfully created medaited vfpga device\n");
return ret_val;
}
/**
* @brief Called on deletion of the mediated device.
* Furthermore it frees up the vcid and removes it from
* the list of fpgas. This function should only be called from the
* mdev framework!
*
* @param mdev passed from the mdev framework
* @return int zero on success
*/
static int hypervisor_vfpga_remove(struct mdev_device *mdev)
{
struct fpga_dev *fpga;
struct m_fpga_dev *m_vfpga;
m_vfpga = mdev_get_drvdata(mdev);
fpga = m_vfpga->fpga;
// release vcid
spin_lock(&fpga->vcid_lock);
fpga->vcid_chunks[m_vfpga->id].next = fpga->vcid_alloc;
fpga->vcid_alloc = &fpga->vcid_chunks[m_vfpga->id];
fpga->num_free_pid_chunks += 1;
spin_unlock(&fpga->vcid_lock);
// Remove from the list of virtual vFPGAs
spin_lock(&fpga->list_lock);
list_del(&m_vfpga->next);
spin_unlock(&fpga->list_lock);
// Free memory
kfree(m_vfpga);
dbg_info("successfully removed mdeiated vfpga device\n");
return 0;
}
/**
* @brief handler function for the vfio kvm notify event.
* Only called by the vfio framework. For example, if we use qemu
* this function is called by qemu to let out module know which
* kvm it is using. This is later needed to do page table walks inside
* this module.
*
* @param nb notifier block from vfio
* @param action what event happend
* @param data payload
* @return int zero on success
*/
int hypervisor_vfio_notifier(struct notifier_block *nb, unsigned long action, void *data)
{
struct m_fpga_dev *vfpga;
vfpga = container_of(nb, struct m_fpga_dev, notifier);
spin_lock(&vfpga->lock);
// only do something if this is a set kvm event
if (action == VFIO_GROUP_NOTIFY_SET_KVM)
{
vfpga->kvm = data;
dbg_info("kvm set successfully");
}
spin_unlock(&vfpga->lock);
return 0;
}
/**
* @brief handles the opening of the mediated device file (after creation)
* this happens if the vm boots and therefore the file
* is opened from the hypervisor to emulate the pci device. This function
* does most of the allocation to allow the emulation.
* This includes the MSIX region setup and the registration
* of the kvm notifier.
*
* @param mdev passed from the mdev framework
* @return int zero on success
*/
static int hypervisor_vfpga_open(struct mdev_device *mdev)
{
struct m_fpga_dev *vfpga;
long unsigned int events;
int ret_val;
int i;
ret_val = 0;
// Get virtual vFGPA struct
vfpga = mdev_get_drvdata(mdev);
spin_lock(&vfpga->lock);
// mediated vfpgas are only inteded for the use by one vm at the time
if (vfpga->in_use)
{
dbg_info("Failed to open vfio device: busy\n");
spin_unlock(&vfpga->lock);
return -EBUSY;
}
// Set busy
vfpga->in_use = 1;
spin_unlock(&vfpga->lock);
// allocate msix table
// TODO: remove this, seems to be handled by QEMU
vfpga->msix_table = kzalloc(2000, GFP_KERNEL);
if (!vfpga->msix_table)
{
dbg_info("Failed to allocate msix tabel\n");
ret_val = -ENOMEM;
goto err_msix_table;
}
// allocate array for eventfd fds
vfpga->msix_vector = kzalloc(NUM_INTERRUPTS * sizeof(struct msix_interrupt), GFP_KERNEL);
if (!vfpga->msix_vector)
{
dbg_info("Failed to allocate msix interrupts\n");
ret_val = -ENOMEM;
goto err_msix_interrupts;
}
// expliclty invalidate all file descriptors entries
for (i = 0; i < NUM_INTERRUPTS; i++)
{
vfpga->msix_vector[i].eventfd = -1;
}
// Init memory maps
hash_init(vfpga->sbuff_map);
/* We know that this is the only thread accessing this device */
// Register KVM notifier
events = VFIO_GROUP_NOTIFY_SET_KVM;
vfpga->notifier.notifier_call = hypervisor_vfio_notifier;
ret_val = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events, &vfpga->notifier);
if (ret_val)
{
pr_err("notifier registration failed\n");
}
dbg_info("successfully opened mediated vfpga\n");
return ret_val;
err_msix_interrupts:
kfree(vfpga->msix_table);
err_msix_table:
return ret_val;
}
/**
* @brief called when the device file is closed. This happens if the vm shuts down
* removes every state that was associated with the current vm. This includes
* to disable all current interrupts and free all msix data.
*
* @param mdev mdev device in question
*/
static void hypervisor_vfpga_close(struct mdev_device *mdev)
{
struct m_fpga_dev *vfpga;
int ret_val;
ret_val = 0;
// Get virtual vFGPA struct
vfpga = mdev_get_drvdata(mdev);
spin_lock(&vfpga->lock);
BUG_ON(!vfpga->in_use);
// unset all interrupts
msix_unset_all_interrupts(vfpga);
// Set free
vfpga->in_use = 0;
spin_unlock(&vfpga->lock);
// free allocated memory
kfree(vfpga->msix_table);
kfree(vfpga->msix_vector);
// Unregister vfio notifier
ret_val = vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &vfpga->notifier);
if (ret_val)
{
dbg_info("Failed to unregister VFIO notifier\n");
}
dbg_info("Closed mediated vfpga file\n");
}
/**
* @brief General function to access data inside of the vm,
* allows to read and write from the vm that is associated with the vfpga.
* Only works after the device file was opened!
*
* @param vfpga mediated vfpga device
* @param gpa guest physical address
* @param count bytes to read/write
* @param buf source/target buffer
* @param write
* @return ssize_t
*/
static ssize_t hypervisor_access_kvm(struct m_fpga_dev *vfpga,
gpa_t gpa,
size_t count,
void *buf,
int write)
{
struct kvm *kvm;
int ret_val;
int idx;
kvm = vfpga->kvm;
BUG_ON(!kvm);
ret_val = 0;
dbg_info("Accessing kvm at gpa %llx to %s %lu bytes", gpa, (write ? "write" : "read"), count);
/*
The function uses the page tables of the vm and
therefore they are not allowed to change during the translation.
Therefore we have to hold the srce lock.
*/
idx = srcu_read_lock(&kvm->srcu);
if (write)
{
ret_val = kvm_write_guest(kvm, gpa, buf, count);
}
else
{
ret_val = kvm_read_guest(kvm, gpa, buf, count);
}
if (ret_val)
{
pr_info("Failed to %s kvm\n", (write ? "write" : "read"));
}
srcu_read_unlock(&kvm->srcu, idx);
return ret_val;
}
/**
* @brief Bar0 access are relayed to the actual hardware for
* calls that would be mmap in a not vm scenario. BAR0 acts as a passthrough
* for this cases and are mmaped in the vm. Therefore this function is
* not used and should not be used since the trap comes at a very high cost
* and should be avoided whenever possible!
*
* @param vfpga mediated vfpga
* @param buf read/write buffer
* @param count bytes to read/write
* @param pos offset into the bar register
* @param write 1 for write
* @return ssize_t bytes read/write
*/
static ssize_t handle_bar0_access(struct m_fpga_dev *vfpga, char __user *buf,
size_t count, loff_t pos, int write)
{
uint64_t offset;
uint64_t index;
struct fpga_dev *d;
uint64_t tmp[64];
void __iomem *ioaddr;
int ret_val;
ret_val = 0;
BUG_ON(!vfpga);
d = vfpga->fpga;
BUG_ON(!d);
/*
20 is the shift that is used for the subregions between the not avx and avx
regions. Therefore to differentiate between accesses of this two regions
the fastes way is to compute the value of the upper bits.
*/
index = pos >> 20;
offset = pos & ((1 << 20) - 1);
if (write)
{
dbg_info("Accessing bar0 at addr %#llx and offset %#llx with count %lu at index %llx\n", pos, offset, count, index);
}
switch (index)
{
case 0x1:
/*
Depending on the offset into the region we want to access at different
mappings. This is happening here. This is one of the reasons why a trap
is not very efficent.
*/
if (offset >= FPGA_CTRL_LTLB_OFFS && offset < FPGA_CTRL_LTLB_SIZE + FPGA_CTRL_LTLB_OFFS)
{
ioaddr = (void __iomem *)vfpga->fpga->fpga_lTlb + (offset - FPGA_CTRL_LTLB_OFFS);
}
else if (offset >= FPGA_CTRL_STLB_OFFS && offset < FPGA_CTRL_STLB_SIZE + FPGA_CTRL_STLB_OFFS)
{
ioaddr = (void __iomem *)vfpga->fpga->fpga_sTlb + (offset - FPGA_CTRL_STLB_OFFS);
}
else if (offset >= FPGA_CTRL_USER_OFFS && offset < FPGA_CTRL_USER_OFFS + FPGA_CTRL_USER_SIZE)
{
ioaddr = (void __iomem *)vfpga->fpga->fpga_user + (offset - FPGA_CTRL_USER_OFFS);
}
else if (offset >= FPGA_CTRL_CNFG_OFFS && offset < FPGA_CTRL_CNFG_OFFS + FPGA_CTRL_CNFG_SIZE)
{
ioaddr = (void __iomem *)vfpga->fpga->fpga_cnfg + (offset - FPGA_CTRL_CNFG_OFFS);
}
else
{
return -EFAULT;
}
break;
case 0x10:
/*
For avx accesses we do not have this problem. However, during testing we found out
that this part is not working. The corresponding avx instrunction cannot
be trapped and therefore if the corresponding region is not mmaped we cannot
use avx.
*/
ioaddr = (void __iomem *)vfpga->fpga->fpga_cnfg_avx + offset;
break;
default:
dbg_info("Access to unspported offset!\n");
return -EFAULT;
}
if (write)
dbg_info("%s at ioaddr %p\n", write ? "write" : "read", ioaddr);
if (write)
dbg_info("tmp addr %p and buf addr %p\n", tmp, buf);
#ifndef HYPERVISOR_TEST
if (write)
{
// copy from user
ret_val = copy_from_user(tmp, buf, count);
BUG_ON(ret_val);
memcpy_toio(ioaddr, tmp, count);
// write through to hardware
dbg_info("performed write to ioaddr %p of %lu bytes, first 8 bytes %#016llx", ioaddr, count, tmp[0]);
}
else
{
// copy from io to user
memcpy_fromio(tmp, ioaddr, count);
ret_val = copy_to_user(buf, tmp, count);
BUG_ON(ret_val);
}
#endif
return count;
}
/**
* @brief BAR2 is for communication between the hypervisor
* and the guest driver. These are virtualized versions of the IOCTL
* calls in fpga_fops. This function only handles reads from the register.
* Here are control functions trapped and handled according to the
* desired behaviour but in a mediated manner. All these
* calls are made by the guest driver to communicate with the hypervisor.
* The instructions are:
*
* REGISTER_PID:
* Register PID. This is used in combination with a write to REGISTER_PID.
* The read returns a new CPID that can be used by the guest.
*
* READ_CNFG:
* Read through of the the corresponding ioctl call. Returns key
* configuration parameters of the fpga platform.
*
* @param vfpga mediated vfpga
* @param buf read buffer
* @param count bytes to read
* @param pos offset into the register
* @return ssize_t bytes read
*/
static ssize_t handle_bar2_read(struct m_fpga_dev *vfpga, char __user *buf,
size_t count, loff_t pos)
{
loff_t offset;
int ret_val;
uint64_t tmp[MAX_USER_WORDS];
struct fpga_dev *d;
struct bus_drvdata *pd;
BUG_ON(!vfpga);
d = vfpga->fpga;
BUG_ON(!d);
pd = d->pd;
BUG_ON(!pd);
// The lower 32 bit represent a command that should be executed
offset = pos & HYPERVISOR_OFFSET_MASK;
switch (offset)
{
case REGISTER_PID_OFFSET:
{
spin_lock(&vfpga->current_cpid_lock);
ret_val = copy_to_user(buf, &vfpga->current_cpid, sizeof(&vfpga->current_cpid));
if (ret_val)
{
pr_err("%s.%u: Failed to read\n", __func__, __LINE__);
spin_unlock(&vfpga->current_cpid_lock);
return -EFAULT;
}
// Reset cpid
vfpga->current_cpid = INVALID_CPID;
spin_unlock(&vfpga->current_cpid_lock);
return count;
}
case READ_CNFG_OFFSET:
{
if (!IS_ALIGNED(pos, 8))
{
dbg_info("READ_CNFG not correctly alligned\n");
return -EFAULT;
}
if (count != 8)
{
dbg_info("READ_CNFG not 8 byte read\n");
return -EFAULT;
}
#ifdef HYPERVISOR_TEST
tmp[0] = 0xdeadbeef;
#else
tmp[0] = ((uint64_t)pd->n_fpga_chan << 32) | ((uint64_t)pd->n_fpga_reg << 48) |
((uint64_t)pd->en_avx) | ((uint64_t)pd->en_bypass << 1) | ((uint64_t)pd->en_tlbf << 2) | ((uint64_t)pd->en_wb << 3) |
((uint64_t)pd->en_strm << 4) | ((uint64_t)pd->en_mem << 5) | ((uint64_t)pd->en_pr << 6) |
((uint64_t)pd->en_rdma_0 << 16) | ((uint64_t)pd->en_rdma_1 << 17) | ((uint64_t)pd->en_tcp_0 << 18) | ((uint64_t)pd->en_tcp_1 << 19);
#endif
dbg_info("reading config 0x%llx\n", tmp[0]);
ret_val = copy_to_user(buf, tmp, count);
if (ret_val)
{
dbg_info("Failed to copy to user\n");
return -EFAULT;
}
return count;
}
default:
{
// Not mapped, return no meaningful data
return count;
}
}
}
/**
* @brief AVX passthrough. this is not implemented for the simple reason,
* that trapping AVX instrunctions does not work at the moment. Therefore if
* this function would be needed it would not work and therefore the user should
* fallback to a bitstream that does not use avx.
*
* @param vfpga
* @param buf
* @param count
* @param pos
* @param is_write
* @return ssize_t
*/
static ssize_t handle_bar4_access(struct m_fpga_dev *vfpga, char __user *buf,
size_t count, loff_t pos, int is_write)
{
return count;
}
/**
* @brief Called by the vfpga framework for any access to the virtual pci
* device in the vm. Based on the region accessed the access is handled
* by a special function. Therefore this function acts as a simple
* demultiplexer.
*
* @param mdev mediated vfpga device
* @param buf read buffer
* @param count bytes to read
* @param ppos position, this is determined by VFIO by IOCTL calls to the driver, handled below
* @return ssize_t bytes read
*/
static ssize_t hypervisor_vfpga_read(struct mdev_device *mdev, char __user *buf,
size_t count, loff_t *ppos)
{
loff_t pos, offset, index;
int ret_val;
struct m_fpga_dev *vfpga;
pos = *ppos;
offset = pos & HYPERVISOR_OFFSET_MASK;
index = COYOTE_GET_INDEX(pos);
ret_val = 0;
vfpga = mdev_get_drvdata(mdev);
switch (index)
{
case VFIO_PCI_CONFIG_REGION_INDEX:
{
// dbg_info("Reading pci config at offset %llu, reading %lu bytes\n", offset, count);
if (offset + count > COYOTE_HYPERVISOR_CONFIG_SIZE)
{
pr_err("%s.%u: Out of bound read\n", __func__, __LINE__);
return -EFAULT;
}
// Read from in memory config
ret_val = copy_to_user(buf, ((char *)&vfpga->pci_config) + offset, count);
if (ret_val)
{
pr_err("%s.%u: Failed to read\n", __func__, __LINE__);
return -EFAULT;
}
return count;
}
case VFIO_PCI_BAR0_REGION_INDEX:
{
// Check bounds
if (offset + count > COYOTE_HYPERVISOR_BAR0_SIZE)
{
pr_err("%s.%u: Out of bound read\n", __func__, __LINE__);
return -EFAULT;
}
return handle_bar0_access(vfpga, buf, count, pos, 0);
}
case VFIO_PCI_BAR2_REGION_INDEX:
{
// Check bounds
if (offset + count > COYOTE_HYPERVISOR_BAR2_SIZE)
{
pr_err("%s.%u: Out of bound read\n", __func__, __LINE__);
return -EFAULT;
}
return handle_bar2_read(vfpga, buf, count, pos);
}
case VFIO_PCI_BAR4_REGION_INDEX:
{
// Check bounds
if (offset + count > COYOTE_HYPERVISOR_BAR4_SIZE)
{
pr_err("%s.%u: Out of bound read\n", __func__, __LINE__);
return -EFAULT;
}
return handle_bar4_access(vfpga, buf, count, pos, 0);
}
default:
{
// Not a valid region to read
pr_debug("%s.%u: Read to invalid region %lld\n", __func__, __LINE__, index);
return 0;
}
}
}
/**
* @brief write to a base address register (BAR). Keeps the lowest 4 bits
* at all time, since they contain non writeable bits.
*
* @param bar pointer to BAR
* @param val value to be written
* @param low write to low(1) or high(0) register
*/
static void cfg_bar_write(uint32_t *bar, uint32_t val, int low)
{
if (low)
{
*bar = (val & GENMASK(31, 4)) |
(*bar & GENMASK(3, 0));
}
else
{
*bar = val;
}
}
/**
* @brief Function to handle writes to the PCI config. Not all writes should be permitted
* so this function emulates that should happen if we would wirte to an actual pci device
* TODO: Here is some functionaly missing, but at the moment the relevant subset is implemented
*
* @param fpga mediated vfpga that the wrote is to
* @param buf write buffer
* @param count bytes to write
* @param pos offset into the configuration space
* @return int zero on success
*/
static int handle_pci_cfg_write(struct m_fpga_dev *fpga, char __user *buf, size_t count, loff_t pos)
{
uint32_t new_val;
int ret_val;
ret_val = copy_from_user(&new_val, buf, count);
if (ret_val)
{
dbg_info("could not copy new pci cfg value from userspace\n");
return -EFAULT;
}
pos = pos & HYPERVISOR_OFFSET_MASK;
switch (pos)
{
case 0x10: // BAR 0
if (new_val == GENMASK(31, 0))
{
cfg_bar_write(&fpga->pci_config.bar0, (uint32_t)(COYOTE_HYPERVISOR_BAR0_MASK | PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_SPACE_MEMORY), 1);
}
else
{
cfg_bar_write(&fpga->pci_config.bar0, new_val, 1);
}
break;
case 0x14: // BAR 1
if (new_val == GENMASK(31, 0))
{
cfg_bar_write(&fpga->pci_config.bar1, (uint32_t)(~(COYOTE_HYPERVISOR_BAR0_SIZE - 1) >> 32), 0);
}
else
{
cfg_bar_write(&fpga->pci_config.bar1, new_val, 0);
}
break;
case 0x18: // BAR 2
if (new_val == GENMASK(31, 0))
{
cfg_bar_write(&fpga->pci_config.bar2, (uint32_t)(COYOTE_HYPERVISOR_BAR2_MASK | PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_SPACE_MEMORY), 1);
}
else
{
cfg_bar_write(&fpga->pci_config.bar2, new_val, 1);
}
break;
case 0x1C: // BAR 3
if (new_val == GENMASK(31, 0))
{
cfg_bar_write(&fpga->pci_config.bar3, (uint32_t)(~(COYOTE_HYPERVISOR_BAR2_SIZE - 1) >> 32), 0);
}
else
{
cfg_bar_write(&fpga->pci_config.bar3, new_val, 0);
}
break;
case 0x20: // BAR 4
if (new_val == GENMASK(31, 0))
{
cfg_bar_write(&fpga->pci_config.bar4, (uint32_t)(COYOTE_HYPERVISOR_BAR4_MASK | PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_SPACE_MEMORY), 1);
}
else
{
cfg_bar_write(&fpga->pci_config.bar4, new_val, 1);
}
break;
case 0x24: // BAR 5
if (new_val == GENMASK(31, 0))
{
cfg_bar_write(&fpga->pci_config.bar5, (uint32_t)(~(COYOTE_HYPERVISOR_BAR4_SIZE - 1) >> 32), 0);
}
else
{
cfg_bar_write(&fpga->pci_config.bar5, new_val, 0);
}
break;
case 0x3C: // Interrupt line
fpga->pci_config.interrupt_line = (uint8_t)new_val;
break;
default:
if (pos >= MSIX_OFFSET && pos < MSIX_OFFSET + MSIX_SIZE)
{
// Emulate write to msix header
ret_val = write_to_msix_header((void *)(fpga->pci_config.cap_section + MSIX_OFFSET), pos - MSIX_OFFSET, new_val, count);
if (ret_val < 0)
{
return ret_val;
}
}
else
{
// dbg_info("write to fpga config with %lu not handled writes at offset 0x%llx",
// count, pos);
}
}
return 0;
}
/**
* @brief This function handles most of the communication.
* The offset is interpreted as command to the hypervisor and
* the value of the write can be used as argument either to pass a simple
* number or in case of more complicated data it passes a pointer to data
* inside the vm.
* The following functionalities are handled:
*
* REGISTER_PID:
* A write to this command allocates a new CPID (if any are free)
* and registers the pid passed as argument. The CPID can be retrieved by the
* guest by a read at the same offset.
*
* UNREGISTER_PID:
* Complement to REGISTER_PID. The guest writes the CPID to this offset
* to deregister a guest process.
*
* MAP_USER:
* The guest writes the address to the notifier to this offset. The hypervisor
* copies the notifier from the vm. The notifier contains information about
* the address range that should be mapped on the fpga.
*
* UNMAP_USER:
* Counterpart to MAP_USER. Works in a similar matter. The guest writes
* the address of a notifier to this offset. The hypervisor reads the struct from the
* vm and unmaps the range. The range has to be mapped previsously with a call to
* MAP_USER.
*
* PUT_ALL_USER_PAGES:
* The guest writes 0 or non zero to this offset to indicate if the pages
* should not be dirtied/should be dirtied before releasing them.
* The call then puts all pages that were mapped by the mediated device.
* The guest usally calls this if the device is closed in the vm.
*
* TEST_INTERRUPT:
* TODO: delete
*
* @param vfpga mediated vfpga
* @param buf write buffer
* @param count bytes to write
* @param pos offset into BAR2 region
* @return ssize_t bytes written
*/
static ssize_t handle_bar2_write(struct m_fpga_dev *vfpga, const char __user *buf,
size_t count, loff_t pos)
{
loff_t offset;
int ret_val;
uint64_t pid, epid;
uint64_t cpid;
struct bus_drvdata *pd;
uint64_t tmp[MAX_USER_WORDS + 2];
struct hypervisor_map_notifier map_notifier, *full_map_notifier;
uint64_t map_full_size;
ret_val = 0;
offset = pos & HYPERVISOR_OFFSET_MASK;
pd = vfpga->fpga->pd;
switch (offset)
{
case REGISTER_PID_OFFSET:
{
ret_val = copy_from_user(tmp, buf, count);
if (ret_val)
{
pr_err("%s.%u: Failed to copy data from user\n", __func__, __LINE__);
return -EFAULT;
}
pid = tmp[0];
dbg_info("Registering pid %llu\n", pid);
spin_lock(&pd->stat_lock);
spin_lock(&vfpga->current_cpid_lock);
// Calculate an effective pid to avoid clashes with other vms
// lower 16 bits are the pid inside the vm and the upper 16
// bits are to identify the vm
epid = pid | (vfpga->id << 16);
// register epid
cpid = register_pid(vfpga->fpga, epid);
if (cpid == -1)
{
pr_err("registration failed in hypervisor pid: %lld id: %d", pid, vfpga->id);
spin_unlock(&pd->stat_lock);
return -EIO;
}
// bookeeping which virtual devices belongs to which region
vfpga->fpga->vdevs[cpid] = vfpga;
vfpga->current_cpid = cpid;
spin_unlock(&pd->stat_lock);
spin_unlock(&vfpga->current_cpid_lock);
dbg_info("Successfully registered pid %llu\n", pid);
return count;
}
case UNREGISTER_PID_OFFSET:
{
// read cpid
ret_val = copy_from_user(&tmp, buf, count);
if (ret_val)
{
pr_err("%s.%u: Failed to copy data from user\n", __func__, __LINE__);
return -EFAULT;
}
cpid = tmp[0];
dbg_info("Unregistering cpid %llu\n", cpid);
// Unregister cpid
spin_lock(&pd->stat_lock);
ret_val = unregister_pid(vfpga->fpga, cpid);
if (ret_val)
{
pr_err("unregestration failed in hypervisor pid: %lld id: %d\n", pid, vfpga->id);
spin_unlock(&pd->stat_lock);
return -EIO;
}
// Remove reverse mapping, cpid is not in use anymore
vfpga->fpga->vdevs[cpid] = NULL;
dbg_info("unregestration succesfull in hypervisor pid: %lld id: %d\n", pid, vfpga->id);
spin_unlock(&pd->stat_lock);
return count;
}
case MAP_USER_OFFSET:
{
// read gpa of the arguments
ret_val = copy_from_user(&tmp, buf, count);
if (ret_val)
{
pr_info("%s.%d: Failed to copy gpa", __func__, __LINE__);
return -EFAULT;
}
// read notifier header from guest
ret_val = hypervisor_access_kvm(vfpga, tmp[0], sizeof(struct hypervisor_map_notifier), &map_notifier, 0);
if (ret_val)
{
pr_info("%s.%d: Failed to read from guest", __func__, __LINE__);
return -EIO;
}
// read complete notifier from guest
map_full_size = sizeof(struct hypervisor_map_notifier) + map_notifier.npages * sizeof(uint64_t);
full_map_notifier = kzalloc(map_full_size, GFP_KERNEL);
ret_val = hypervisor_access_kvm(vfpga, tmp[0], map_full_size, full_map_notifier, 0);
if (ret_val)
{
pr_info("%s.%d: Failed to read from guest", __func__, __LINE__);
kfree(full_map_notifier);
return -EIO;
}
dbg_info("Mapping user pages from hypervisor: gva: %llx, len: %llu, cpid: %llu", full_map_notifier->gva,
full_map_notifier->len, full_map_notifier->cpid);
// Pin pages and install user mappings onto fpga.
ret_val = hypervisor_tlb_get_user_pages(vfpga, full_map_notifier);
if (ret_val)
{
pr_info("%s.%d: Failed to get all user pages", __func__, __LINE__);
}
else
{
ret_val = count;
dbg_info("Successfully mapped user buffer\n");
}
kfree(full_map_notifier);
return ret_val;
}
case UNMAP_USER_OFFSET:
{
// Copy gpa
ret_val = copy_from_user(&tmp, buf, count);
if (ret_val)
{
pr_info("%s.%d: Failed to copy gpa", __func__, __LINE__);
return -EFAULT;
}
// read gva + cpid from kvm
ret_val = hypervisor_access_kvm(vfpga, tmp[0], sizeof(struct hypervisor_map_notifier), &map_notifier,
0);
if (ret_val)
{
pr_info("%s.%d: Failed to read from kvm", __func__, __LINE__);
return -EFAULT;
}
ret_val = hypervisor_tlb_put_user_pages(vfpga, &map_notifier);
if (ret_val)
{
pr_info("%s.%d: Failed to put all user pages", __func__, __LINE__);
}
else
{
ret_val = count;
dbg_info("Successfully unmapped user buffer\n");
}
return ret_val;
}
case PUT_ALL_USER_PAGES:
{
// read dirtied flag
ret_val = copy_from_user(&tmp, buf, count);
if (ret_val)
{
pr_info("could not copy dirtied flag\n");
return -EFAULT;
}
// put all user pages
ret_val = hypervisor_tlb_put_user_pages_all(vfpga, tmp[0]);
if (ret_val)
{
pr_info("could not put all user pages\n");
return -EIO;
}
return count;
}
case TEST_INTERRUPT_OFFSET:
{
dbg_info("Fired interrupt with ret_val %llu!\n", fire_interrupt(&vfpga->msix_vector[0]));
return count;
}
default:
{
// Not used, should not cause any state change
return count;
}
}
return count;
}
/**
* @brief Similar to vfpga_read. Demultiplexes the writes
* to the emulated pci device.
*
* @param mdev mediated virtual device
* @param buf write buffer
* @param count bytes to write
* @param ppos address
* @return ssize_t bytes written
*/
static ssize_t hypervisor_vfpga_write(struct mdev_device *mdev, const char __user *buf,
size_t count, loff_t *ppos)
{
loff_t pos, offset, index;
int ret_val;
struct m_fpga_dev *vfpga;
pos = *ppos;
offset = pos & HYPERVISOR_OFFSET_MASK;
index = COYOTE_GET_INDEX(pos);
ret_val = 0;
vfpga = mdev_get_drvdata(mdev);
BUG_ON(!vfpga);
switch (index)
{
case VFIO_PCI_CONFIG_REGION_INDEX:
{
if (offset + count > COYOTE_HYPERVISOR_CONFIG_SIZE)
{
pr_err("%s.%u: Out of bound write\n", __func__, __LINE__);
return -EFAULT;
}
ret_val = handle_pci_cfg_write(vfpga, (char __user *)buf, count, pos);
if (ret_val)
{
dbg_info("could not write pci cfg\n");
return -EIO;
}
return count;
}
case VFIO_PCI_BAR0_REGION_INDEX:
{
// Check bounds
if (offset + count > COYOTE_HYPERVISOR_BAR0_SIZE)
{
pr_err("%s.%u: Out of bound read\n", __func__, __LINE__);
return -EFAULT;
}
return handle_bar0_access(vfpga, (char __user *)buf, count, pos, 1);
}
case VFIO_PCI_BAR2_REGION_INDEX:
{
// Check bounds
if (offset + count > COYOTE_HYPERVISOR_BAR2_SIZE)
{
pr_err("%s.%u: Out of bound read\n", __func__, __LINE__);
return -EFAULT;
}
return handle_bar2_write(vfpga, buf, count, pos);
}
case VFIO_PCI_BAR4_REGION_INDEX:
{
// Check bounds
if (offset + count > COYOTE_HYPERVISOR_BAR4_SIZE)
{
pr_err("%s.%u: Out of bound read\n", __func__, __LINE__);
return -EFAULT;
}
return handle_bar4_access(vfpga, (char *)buf, count, pos, 1);
}
default:
{
// Not a valid region to read
pr_debug("%s.%d: Write to invalid region %lld\n", __func__, __LINE__, index);
return 0;
}
}
}
/**
* @brief IOCTL calls to the device. The vm cannot perform any ioctl calls
* but the VFIO framework has a set of IOCTL functions that it will use to gather
* information over the pci device that is emulated by this driver.
*
* These are:
* - VFIO_DEVICE_GET_INFO
* Passes a pointer to a struct. This is filled by this function with the
* amount of regions and interrupts. Furthermore it communicates the
* capabilties of the device.
*
* - VFIO_DEVICE_GET_REGION_INFO
* Passes a pointer to a struct. The user set the index of the region
* it wants more information about and this function will set the remaining fields
* with relevant information such as offset and size.
*
* - VFIO_DEVICE_GET_IRQ_INFO
* Similar to GET_REGION_INFO, but for interrupts. The user passes
* a struct and specifies about which kind of interrupt the information should
* be provided and the this function will set the other fields.
*
* - VFIO_DEVICE_SET_IRQS
* Managment for the interrupts. The user calls with a combination of flags that
* set interrupts. For more information consult the VFIO documentation.
*
* - VFIO_DEVICE_RESET
* TODO: Implement
*
* @param mdev mediated vfpga
* @param cmd
* @param arg
* @return long
*/
static long hypervisor_vfpga_ioctl(struct mdev_device *mdev, unsigned int cmd, unsigned long arg)
{
struct m_fpga_dev *vfpga;
struct vfio_device_info dev_info;
struct vfio_region_info region_info;
struct vfio_irq_info irq_info;
struct vfio_irq_set *irq_set;
int ret_val;
unsigned int bytes;
void __user *argp;
uint64_t index;
BUG_ON(!mdev);
vfpga = mdev_get_drvdata(mdev);
if (!vfpga)
{
pr_err("Failed to get drv data\n");
return -EIO;
}
ret_val = 0;
argp = (void __user *)arg;
switch (cmd)
{
case VFIO_DEVICE_GET_INFO:
{
// Copy the argsz paramter from user space
ret_val = copy_from_user(&bytes, argp, sizeof(bytes));
if (ret_val)
{
pr_err("%s.%u: Failed to copy from user space", __func__, __LINE__);
return -EFAULT;
}
// Copy the dev_info struct
ret_val = copy_from_user(&dev_info, argp, bytes);
if (ret_val)
{
pr_err("%s.%u: Failed to copy from user space", __func__, __LINE__);
return -EFAULT;
}
// Resetable PCI device
dev_info.flags = VFIO_DEVICE_FLAGS_PCI | VFIO_DEVICE_FLAGS_RESET;
// Propagate default values from the framework
dev_info.num_regions = VFIO_PCI_NUM_REGIONS;
dev_info.num_irqs = VFIO_PCI_NUM_IRQS;
// Copy the updated info struct back to the user
ret_val = copy_to_user(argp, &dev_info, bytes);
if (ret_val)
{
pr_err("%s.%u: Failed to copy from user space", __func__, __LINE__);
return -EFAULT;
}
return 0;
}
case VFIO_DEVICE_GET_REGION_INFO:
{
// Copy the argsz paramter from user space
ret_val = copy_from_user(&bytes, argp, sizeof(bytes));
if (ret_val)
{
pr_err("%s.%u: Failed to copy from user space", __func__, __LINE__);
return -EFAULT;
}
// Copy the dev_info struct
ret_val = copy_from_user(&region_info, argp, bytes);
if (ret_val)
{
pr_err("%s.%u: Failed to copy from user space", __func__, __LINE__);
return -EFAULT;
}
// For the config region and the
// 3 64 bit registers return the size. Everything else does not
// exist and is therefore 0.
index = region_info.index;
switch (index)
{
case VFIO_PCI_CONFIG_REGION_INDEX:
{
region_info.size = COYOTE_HYPERVISOR_CONFIG_SIZE;
break;
}
case VFIO_PCI_BAR0_REGION_INDEX:
{
region_info.size = COYOTE_HYPERVISOR_BAR0_SIZE;
// direct pass through, for high performance, do not trap this instructions
region_info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
break;
}
case VFIO_PCI_BAR2_REGION_INDEX:
{
region_info.size = COYOTE_HYPERVISOR_BAR2_SIZE;
break;
}
case VFIO_PCI_BAR4_REGION_INDEX:
{
region_info.size = COYOTE_HYPERVISOR_BAR4_SIZE;
// direct pass through, for high performance, do not trap this instructions
region_info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
break;
}
default:
{
region_info.size = 0;
}
}
// Upper 32 bits are used to indicate the index
region_info.offset = COYOTE_INDEX_TO_ADDR(index);
// Allow read and write
region_info.flags |= VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE;
// Copy the updated info struct back to the user
ret_val = copy_to_user(argp, &region_info, bytes);
if (ret_val)
{
pr_err("%s.%u: Failed to copy from user space", __func__, __LINE__);
return -EFAULT;
}
return 0;
}
case VFIO_DEVICE_GET_IRQ_INFO:
{
// Copy the argsz paramter from user space
ret_val = copy_from_user(&bytes, argp, sizeof(bytes));
if (ret_val)
{
pr_err("%s.%u: Failed to copy from user space", __func__, __LINE__);
return -EFAULT;
}
// Copy the dev_info struct
ret_val = copy_from_user(&irq_info, argp, bytes);
if (ret_val)
{
pr_err("%s.%u: Failed to copy from user space", __func__, __LINE__);
return -EFAULT;
}
// This device supports MSIX interrupts
if (irq_info.index == VFIO_PCI_MSIX_IRQ_INDEX)
{
irq_info.flags = VFIO_IRQ_INFO_NORESIZE | VFIO_IRQ_INFO_EVENTFD;
// Defined by a define in the header
irq_info.count = NUM_INTERRUPTS;
}
// All other types are not supported
else
{
irq_info.flags = 0;
irq_info.flags = 0;
}
// Copy the updated info struct back to the user
ret_val = copy_to_user(argp, &irq_info, bytes);
if (ret_val)
{
pr_err("%s.%u: Failed to copy from user space", __func__, __LINE__);
return -EFAULT;
}
return 0;
}
case VFIO_DEVICE_SET_IRQS:
{
// copy argz field
ret_val = copy_from_user(&bytes, argp, sizeof(bytes));
if (ret_val)
{
dbg_info("Failed to copy from user space\n");
return -EFAULT;
}
// Allocate memory for addtional data
irq_set = kzalloc(bytes, GFP_KERNEL);
BUG_ON(!irq_set);
// copy struct from user
ret_val = copy_from_user(irq_set, argp, bytes);
if (ret_val)
{
dbg_info("Failed to copy from user space\n");
kfree(irq_set);
return -EFAULT;
}
// Only MSIX interrupts are supported and needed by the
// guest driver.
switch (irq_set->index)
{
case VFIO_PCI_MSIX_IRQ_INDEX:
ret_val = handle_set_irq_msix(vfpga, irq_set);
break;
default:
// dbg_info("Tried to set IRQ! Flags: %x, Index: %u, Start: %u, Count: %u\n",
// irq_set->flags, irq_set->index, irq_set->start, irq_set->count);
break;
}
kfree(irq_set);
return ret_val;
}
case VFIO_DEVICE_RESET:
{
return 0;
}
default:
{
return -EINVAL;
}
}
}
/**
* @brief Allows mmap of BAR 0 and 4, and therefore enables
* direct pass through of the control registers. The vma struct contains an offset
* into the pci region and from this we can determine which BAR is mapped.
* BAR 0 and BAR 4 are seperated from each other. This allows to adjust sizes of
* these control registers later on without to much effort to change the hypervisor.
*
* @param mdev
* @param vma
* @return int
*/
int hypervisor_vfpga_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
{
int region;
unsigned long vaddr;
unsigned long offset;
struct m_fpga_dev *md;
struct fpga_dev *d;
struct bus_drvdata *pd;
int ret_val;
md = mdev_get_drvdata(mdev);
BUG_ON(!md);
d = md->fpga;
BUG_ON(!d);
pd = d->pd;
BUG_ON(!pd);
// offset into the pci region
vaddr = vma->vm_pgoff << PAGE_SHIFT;
// get the region that should be mapped
region = COYOTE_GET_INDEX(vaddr);
// offset into this region
offset = vaddr & HYPERVISOR_OFFSET_MASK;
ret_val = 0;
dbg_info("MMAP with vaddr %lu, VFIO region index %d, offset %lu\n", vaddr, region, offset);
// Do not cache
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
// Allow passthrough to hardware
if (region == VFIO_PCI_BAR0_REGION_INDEX)
{
// BAR0 is for the address ctrl registers
ret_val = remap_pfn_range(vma, vma->vm_start, d->fpga_phys_addr_ctrl >> PAGE_SHIFT,
FPGA_CTRL_SIZE, vma->vm_page_prot);
if (ret_val)
{
dbg_info("Failed to mmap BAR0.\n");
}
else
{
dbg_info("Mapped addr %llx with size %u\n", d->fpga_phys_addr_ctrl,
FPGA_CTRL_SIZE);
}
}
else if (region == VFIO_PCI_BAR4_REGION_INDEX)
{
// BAR4 is for the avx address ctrl registers
ret_val = remap_pfn_range(vma, vma->vm_start, d->fpga_phys_addr_ctrl_avx >> PAGE_SHIFT,
FPGA_CTRL_CNFG_AVX_SIZE, vma->vm_page_prot);
if (ret_val)
{
dbg_info("Failed to mmap BAR4.\n");
}
else
{
dbg_info("Mapped addr %llx with size %u\n", d->fpga_phys_addr_ctrl_avx,
FPGA_CTRL_CNFG_AVX_SIZE);
}
}
else
{
// Not allowed region
ret_val = -EINVAL;
}
return ret_val;
}
//
//
// START: This section contains static functions and arrays that display information 0x2000for the mediated devices in sysfs
//
//
static ssize_t
info_show(struct device *dev, struct device_attribute *attr, char *buf)
{
return sprintf(buf, "fpga region\n");
}
static DEVICE_ATTR_RO(info);
static struct attribute *dummy_attrs[] = {
&dev_attr_info.attr,
NULL,
};
static const struct attribute_group fpga_dev_group = {
.name = "vfpga",
.attrs = dummy_attrs,
};
const struct attribute_group *fpga_dev_groups[] = {
&fpga_dev_group,
NULL,
};
static ssize_t
vfpga_info_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct mdev_device *mdev = mdev_from_dev(dev);
struct m_fpga_dev *vfpga = mdev_get_drvdata(mdev);
return sprintf(buf, "virtual vfpga device with id %d\n", vfpga->id);
}
DEVICE_ATTR_RO(vfpga_info);
static struct attribute *vpga_attrs[] = {
&dev_attr_vfpga_info.attr,
NULL,
};
static const struct attribute_group vfpga_dev_group = {
.name = "vpga info",
.attrs = vpga_attrs,
};
const struct attribute_group *vfpga_dev_groups[] = {
&vfpga_dev_group,
NULL,
};
static ssize_t
name_show(struct device *dev, struct device_attribute *attrs, char *buf)
{
return sprintf(buf, "%s-type\n", dev->kobj.name);
}
DEVICE_ATTR_RO(name);
static ssize_t
device_api_show(struct device *dev, struct device_attribute *attrs, char *buf)
{
return sprintf(buf, VFIO_DEVICE_API_PCI_STRING);
}
DEVICE_ATTR_RO(device_api);
static struct attribute *vfpga_type_attrs[] = {
&dev_attr_name.attr,
&dev_attr_device_api.attr,
NULL,
};
//
// END
//
/**
* @brief This function populates a mdev_parent_ops
* struct that is used to create a mediated type
* that is shown in the sysfs and provides all the callbacks
* used to manage all the devices
*
* @param vfpga virtual fpga region
* @return struct mdev_parent_ops*
*/
struct mdev_parent_ops *hypervisor_get_ops(struct fpga_dev *vfpga)
{
struct mdev_parent_ops *ops;
struct attribute_group *type_group, **type_groups;
char *name;
BUG_ON(!vfpga);
// alloc type group
type_group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
if (!type_group)
{
dbg_info("Failed to allocate type group\n");
goto err;
}
// alloc name
name = kzalloc(64, GFP_KERNEL);
if (!name)
{
dbg_info("could not allocate name\n");
goto err_name;
}
// Create first and only type group
sprintf(name, "fpga_mdev.%d", vfpga->id);
type_group->name = name;
type_group->attrs = vfpga_type_attrs;
// Create type groups array
type_groups = kzalloc(sizeof(struct attribute_group *) * 2, GFP_KERNEL);
if (!type_groups)
{
dbg_info("Could not allocate type group array\n");
goto err_type_groups;
}
type_groups[0] = type_group;
type_groups[1] = NULL;
ops = kzalloc(sizeof(struct mdev_parent_ops), GFP_KERNEL);
if (!ops)
{
dbg_info("could not allocate mdev parent ops\n");
goto err_ops;
}
// Set attributes
ops->owner = THIS_MODULE;
ops->dev_attr_groups = fpga_dev_groups;
ops->mdev_attr_groups = vfpga_dev_groups;
ops->supported_type_groups = type_groups;
// Set handler functions
ops->open_device = hypervisor_vfpga_open;
ops->close_device = hypervisor_vfpga_close;
ops->create = hypervisor_vfpga_create;
ops->remove = hypervisor_vfpga_remove;
ops->write = hypervisor_vfpga_write;
ops->read = hypervisor_vfpga_read;
ops->ioctl = hypervisor_vfpga_ioctl;
ops->mmap = hypervisor_vfpga_mmap;
dbg_info("created vfio-mdev operations\n");
return ops;
err_ops:
kfree(type_groups);
err_type_groups:
kfree(name);
err_name:
kfree(type_group);
err:
return NULL;
}