/* NVClock 0.8 - Linux overclocker for NVIDIA cards
*
* site: http://nvclock.sourceforge.net
*
* Copyright(C) 2001-2006 Roderick Colenbrander
*
* Thanks to Erik Waling for doing Smartdimmer coding/testing. (his code isn't the one in NVClock)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
/* This source file uses some clock calculation code from nvidia's xfree86 driver.
To keep Nvidia happy I have added their copyright. The way they interpret it (see linux kernel riva_hw.h)
is that you need to add the disclaimer and copyright and when that's done
you can basicly do what you want.
*/
/***************************************************************************\
|* *|
|* Copyright 1993-2003 NVIDIA, Corporation. All rights reserved. *|
|* *|
|* NOTICE TO USER: The source code is copyrighted under U.S. and *|
|* international laws. Users and possessors of this source code are *|
|* hereby granted a nonexclusive, royalty-free copyright license to *|
|* use this code in individual and commercial software. *|
|* *|
|* Any use of this source code must include, in the user documenta- *|
|* tion and internal comments to the code, notices to the end user *|
|* as follows: *|
|* *|
|* Copyright 1993-2003 NVIDIA, Corporation. All rights reserved. *|
|* *|
|* NVIDIA, CORPORATION MAKES NO REPRESENTATION ABOUT THE SUITABILITY *|
|* OF THIS SOURCE CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" *|
|* WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. NVIDIA, CORPOR- *|
|* ATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOURCE CODE, *|
|* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGE- *|
|* MENT, AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL *|
|* NVIDIA, CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT, INCI- *|
|* DENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RE- *|
|* SULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION *|
|* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF *|
|* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOURCE CODE. *|
|* *|
|* U.S. Government End Users. This source code is a "commercial *|
|* item," as that term is defined at 48 C.F.R. 2.101 (OCT 1995), *|
|* consisting of "commercial computer software" and "commercial *|
|* computer software documentation," as such terms are used in *|
|* 48 C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Govern- *|
|* ment only as a commercial end item. Consistent with 48 C.F.R. *|
|* 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), *|
|* all U.S. Government End Users acquire the source code with only *|
|* those rights set forth herein. *|
|* *|
\***************************************************************************/
#include <stdio.h>
#include <string.h>
#include "nvclock.h"
#include "backend.h"
/*
/ The original NV40 gpu was used as the base for 6800LE/6800NU/6800GT/6800Ultra
/ GPUs. The difference between all these models lie in the amount of enabled
/ pixel/vertex pipelines and clockspeeds. For instance the 6800LE ships with 8
/ pixel pipelines while the 6800GT ships with 16 of those. Right after production
/ all GPUs are tested, if all pipelines work and they run at high clocks they
/ are called Ultra or if pipes are broken they are called 6800NU(12p) or 6800LE(8p).
/ Further in some cases 'good' GPUs can be rebranded too if there's a shortage of
/ 6800NU/6800LE GPUs. The key to this rebranding is register 0x1540 which contains
/ the enabled pixel/vertex pipelines. Depending on the GPU architecture a bit can
/ correspond to a single vertex shader or to a block containing two or four
/ pixel pipelines.
/
/ We now define some words coming from Rivatuner as people are familiar with those.
/ A 'good' GPU for which pipelines are disabled just to get enough lowend models
/ is said to contain 'Software masked units'. In this case the videobios initializes
/ 0x1540 with a value that locks out some units.
/ GPUs which didn't pass the hardware quality testing contain 'Hardware masked units'.
/ In this case the bios initializes 0x1540 with a value that enables all pipelines.
/ A certain (read-only) register (0xc010) contains a mask of units to disable by default.
/ The bios loads this value into another register (0xc020) at startup. The value from
/ 0xc020 is then used by the drivers to disable units in 0x1540. For example by clearing this
/ register before the drivers are loaded, you can prevent masks from being disabled.
/ 1540 units_cfg (rw) second byte contains vertex configuration and first byte pipeline
/ c010 default_mask (r) pixel pipelines start at +22, while vertex start at +16 (is this correct for all cards?)
/ c020 active_mask (rw)
/ c024/c028/c02c are being set to 0, why? (they correspond to c014/c018/c01c)
/
/ Below are supported pipeline configurations on various types of cards. Not sure
/ if everything is fully correct though:
/ - NV40 0x3f0f 6800 cards
/ - NV41 0x1f07 6800 pci-e (is this correct?)
/ - NV43 0x0703 6200/6600 cards
/ - NV44 0x0703 6200(Go)/Turbocache cards
/ - NV46 0x0703 7300 (is this correct?)
/ - NV47/NV49 0xff3f 7800/7900 cards
/ - NV4B 0x1f07 7600 (is this correct?)
*/
static int nv40_get_default_mask()
{
int mask;
switch(nv_card->arch)
{
case NV40:
mask = 0x3f0f;
break;
case NV41:
mask = 0x1f07;
break;
case NV43:
case NV44:
case NV46:
mask = 0x703;
break;
case NV47:
case NV49:
mask = 0xff3f;
break;
case NV4B:
mask = 0x1f07;
break;
}
return mask;
}
/* Convert a mask containing enabled/disabled pipelines for nv4x cards
/ to a binary string.
*/
static void nv40_unit_mask_to_binary(unsigned char mask, unsigned char hw_default, char *buf)
{
int i, len;
/* Count the number of pipelines on the card */
for(i=0, len=0; i<8; i++)
len += (hw_default & (1<<i)) ? 1 : 0;
for(i=0; i<len; i++)
{
buf[i] = (mask & (1<<(len-i-1))) ? '1' : '0';
}
buf[len] = 0;
}
/* Try to locate hardware maskes units. On success we return 1 and pmask/vmask
/ contain the masked units. When no hw masked units are found we return 0.
*/
static int nv40_get_hw_masked_units(char *pmask, char *vmask, int byte)
{
unsigned int mask = nv_card->PMC[0xc010/4]; /* Or should we show the currently locked pipes? */
unsigned int masked_units;
/* On 6200 cards for some reason 0xc010 can be empty while there are locked pipes, 0xc020 is then used instead.
/ For now we use 0xc020 if that's the case. Note that during unlocking 0xc020 is cleared and that we then lose this locking info.
*/
if(mask == 0)
mask = nv_card->PMC[0xc020/4];
/* For now we show units that are hw masked by default, not the currently masked units; the cli code wants to have this info
/ Unfortunately bios dumping isn't possible on various mobile 6200go cards, so as a fallback use the currently masked pipes
/ in favor of a segfault ;)
*/
/* What to do with NV47 which has 8 vertex units? */
masked_units = (((mask & 0x3f0000) >> 8) | (mask >> 22)) & nv40_get_default_mask();
if(masked_units != 0)
{
if(byte)
{
*pmask = masked_units & 0xff; /* pixel */
*vmask = (masked_units >> 8) & 0xff; /* vertex */
}
else
{
nv40_unit_mask_to_binary(masked_units & 0xff, nv40_get_default_mask() & 0xff, pmask);
nv40_unit_mask_to_binary((masked_units >> 8)& 0xff, (nv40_get_default_mask()>>8) & 0xff, vmask);
}
return 1;
}
return 0;
}
/* Try to locate software maskes units. On success we return 1 and pmask/vmask
/ contain the masked units. When no sw masked units are found we return 0.
*/
static int nv40_get_sw_masked_units(char *pmask, char *vmask, int byte)
{
unsigned int mask = nv40_get_default_mask();
unsigned int pipe_cfg;
/* When a biosdump is present read the default value from there
/ else we use the current pipe_cfg as an alternative. This isn't correct
/ but on some cards we just can't dump the bios (for instance on 6200go cards).
*/
if(nv_card->bios)
pipe_cfg = nv_card->bios->pipe_cfg;
else
nv_card->PMC[0x1540/4] & nv40_get_default_mask();
if(!pipe_cfg)
return 0;
/* Check if the card contains sw masked units by comparing
/ the default pipe_cfg register value with the most optimal
/ register value for the type of card. If they differ we have
/ sw masked units. The check below first does a AND-mask to filter
/ out bits which aren't needed.
*/
if((pipe_cfg & 0xffff) != mask)
{
if(byte)
{
*pmask = (pipe_cfg & ~mask) & 0xff;
*vmask = (pipe_cfg & ~mask) & 0xff00;
}
else
{
nv40_unit_mask_to_binary((pipe_cfg & ~mask) & 0xff, nv40_get_default_mask() & 0xff, pmask);
nv40_unit_mask_to_binary((pipe_cfg & ~mask) & 0xff00, (nv40_get_default_mask() >> 8) & 0xff, vmask);
}
return 1;
}
return 0;
}
/* Receive the number of enabled pixel pipelines and also
/ store a mask with active pipelines. Further store the total
/ number of pixel units per pipeline in total.
*/
static int nv40_get_pixel_pipelines(char *mask, int byte, int *total)
{
unsigned char pipe_cfg = nv_card->PMC[0x1540/4] & 0xff;;
int i, pipelines=0;
/* The number of enabled pixel pipelines is stored in the first 4 (or more?) bits.
/ In case of 6800 hardware a single bit corresponds to 4 pipelines and on NV44/NV46
/ hardware a bit corresponds to 2 pipelines
*/
for(i=0; i<8; i++)
if((pipe_cfg >> i) & 0x1)
pipelines++;
if(byte)
*mask = pipe_cfg;
else
nv40_unit_mask_to_binary(pipe_cfg, nv40_get_default_mask() & 0xff, mask);
/* NV44/NV46 use 2 pixel units per pipeline */
if(nv_card->arch & (NV44 | NV46))
*total = 2;
else
*total = 4;
return pipelines;
}
/* Receive the number of enabled vertex pipelines and also
/ store a mask with active pipelines.
*/
static int nv40_get_vertex_pipelines(char *mask, int byte)
{
unsigned char pipe_cfg = (nv_card->PMC[0x1540/4] >> 8) & 0xff;
int i, pipelines=0;
/* The number of enabled vertex pipelines is stored in the second byte.
/ A a single bit corresponds to 1 vertex pipeline.
*/
for(i=0; i<8; i++)
if((pipe_cfg >> i) & 0x1)
pipelines++;
if(byte)
*mask = pipe_cfg;
else
nv40_unit_mask_to_binary(pipe_cfg, (nv40_get_default_mask() >> 8) & 0xff, mask);
return pipelines;
}
static void nv40_set_pixel_pipelines(unsigned char mask)
{
int pipe_cfg = nv_card->PMC[0x1540/4];
/* Why do 0xc024/0xc028/0xc02c need to be reset? What do they contain? */
nv_card->PMC[0xc020/4] = nv_card->PMC[0xc024/4] = nv_card->PMC[0xc028/4] = nv_card->PMC[0xc02c/4] = 0;
nv_card->PMC[0x1540/4] = ~(~pipe_cfg | 0xff) | mask;
}
static void nv40_set_vertex_pipelines(unsigned char mask)
{
int pipe_cfg = nv_card->PMC[0x1540/4];
/* Why do 0xc024/0xc028/0xc02c need to be reset? What do they contain? */
nv_card->PMC[0xc020/4] = nv_card->PMC[0xc024/4] = nv_card->PMC[0xc028/4] = nv_card->PMC[0xc02c/4] = 0;
nv_card->PMC[0x1540/4] = ~(~pipe_cfg | 0xff00) | (mask<<8);
}
/* Fanspeed code for Geforce6800 hardware */
static float nv40_get_fanspeed()
{
/* Bit 30-16 of register 0x10f0 control the voltage for the pwm signal generator
/ which is connected to the fan. By changing the value in the register the duty cycle can be controlled
/ so that the fan turns slower or faster. Bit 14-0 of 0x10f0 contain the pwm division
/ ratio which decides the smallest fanspeed adjustment step.
/ The value stored in the registers needs to be inverted, so a value of 10% means 90% and so on.
*/
int pwm_divider = nv_card->PMC[0x10f0/4] & 0x7fff;
float fanspeed = (float)(pwm_divider - ((nv_card->PMC[0x10f0/4] >> 16) & 0x7fff)) * 100.0/(float)pwm_divider;
return fanspeed;
}
static void nv40_set_fanspeed(float speed)
{
int value;
int pwm_divider = nv_card->PMC[0x10f0/4] & 0x7fff;
/* For safety reasons we should never disable the fan by not putting it below 10%; further negative values don't exist ;) */
if(speed < 10 || speed > 100)
return;
value = 0x80000000 + ((((int)(100 - speed) * pwm_divider/100) & 0x7fff)<<16) + pwm_divider;
nv_card->PMC[0x10f0/4] = value;
}
/* Fanspeed code for Geforce6600 hardware (does this work for 6200 cards too??)*/
static float nv43_get_fanspeed()
{
/* The first 12 or more bits of register 0x15f4 control the voltage for the pwm signal generator in case
/ of Geforce 6200/6600(GT)/7600/7800GS hardware. By changing the value in the register the duty cycle of the pwm signal
/ can be controlled so that the fan turns slower or faster. The first part of register 0x15f8 contains the pwm division ratio.
/ The value stored in the registers needs to be inverted, so a value of 10% means 90% and so on. (pwm_divider means off, 0 means on)
*/
int pwm_divider = nv_card->PMC[0x15f8/4] & 0x3fff;
float fanspeed = (pwm_divider - (nv_card->PMC[0x15f4/4] & 0x3fff)) * 100.0/(float)pwm_divider;
return fanspeed;
}
static void nv43_set_fanspeed(float speed)
{
int value;
int pwm_divider = nv_card->PMC[0x15f8/4] & 0x3fff;
/* For safety reasons we should never disable the fan by not putting it below 10%; further negative values don't exist ;) */
if(speed < 10 || speed > 100)
return;
value = 0x80000000 + (int)((100 - speed) * pwm_divider/100);
nv_card->PMC[0x15f4/4] = value;
}
/* There's an internal temperature sensor on NV43 hardware and higher
/ Note that the sensor variable which is passed to this function is bogus
/ it is only there to share nv_card->get_gpu_temp between I2C and low-level.
*/
static int nv43_get_gpu_temp(void *sensor)
{
int temp;
int correction=0;
float offset;
float slope;
/* For now duplicate the temperature offset code here. It is needed for Mobile users in most cases the bios can't be read on those GPUs. */
if(!nv_card->bios)
{
switch(nv_card->arch)
{
case NV43:
offset = 32060.0/1000.0;
slope = 792.0/1000.0;
break;
case NV44:
case NV47:
offset = 27839.0/1000.0;
slope = 700.0/1000.0;
break;
case NV46: /* are these really the default ones? they come from a 7300GS bios */
offset = -24775.0/100.0;
slope = 467.0/10000.0;
break;
case NV49: /* are these really the default ones? they come from a 7900GT/GTX bioses */
offset = -25051.0/100.0;
slope = 458.0/10000.0;
break;
case NV4B: /* are these really the default ones? they come from a 7600GT bios */
offset = -24088.0/100.0;
slope = 442.0/10000.0;
break;
}
}
else
{
/* The output value of the sensor needs to be 'calibrated' in order to get the correct temperature. These
/ values are stored in the video bios and are different for each type of gpu. The value needs to be multiplied
/ with a certain 'slope' and further the sensor has an offset and another correction constant.
*/
offset = (float)nv_card->bios->sensor_cfg.diode_offset_mult / (float)nv_card->bios->sensor_cfg.diode_offset_div;
slope = (float)nv_card->bios->sensor_cfg.slope_mult / (float)nv_card->bios->sensor_cfg.slope_div;
correction = nv_card->bios->sensor_cfg.temp_correction;
}
/* Assume that the sensor is disabled when the temperature part (without offset) is 0 */
if((nv_card->PMC[0x15b4/4] & 0xfff) == 0)
{
/* Initialize the sensor, for now program a threshold value of 120C.
*/
nv_card->PMC[0x15b0/4] = 0x10000000 | ((int)(((float)120 - offset - correction) / slope)) ;
nv_card->PMC[0x15b8/4] = 0x14800000;
usleep(500);
}
/* In case of Geforce 7300/7600/7900 cards more than one byte is used for the temperature */
if(nv_card->arch & (NV46 | NV49 | NV4B))
temp = nv_card->PMC[0x15b4/4] & 0x1fff;
else
temp = nv_card->PMC[0x15b4/4] & 0xff;
return (int)(temp * slope + offset) + correction;
}
/* Get current backpanel brightness level on laptops */
static int nv44_mobile_get_smartdimmer()
{
/* Convert level to a value between 1 and 100 */
return 5*(((nv_card->PMC[0x15f0/4] >> 16) & 0x1f) - 1);
}
/* Adjust backpanel brightness on laptops */
static void nv44_mobile_set_smartdimmer(int level)
{
if(level < 15 || level > 100)
return;
/* Convert the level to correct Smartdimmer values; on Windows a value between 4 and 21 works fine although 0-31 should work.
/ The code below creates a value between 4 and 21;
*/
level = level/5 + 1;
/* Modify the smartdimmer part but keep the rest of the register the same */
nv_card->PMC[0x15f0/4] = (level << 16) | (nv_card->PMC[0x15f0/4] & 0xffe0ffff);
}
static int CalcSpeed_nv40(int base_freq, int m1, int m2, int n1, int n2, int p)
{
return (int)((float)(n1*n2)/(m1*m2) * base_freq) >> p;
}
float GetClock_nv40(int base_freq, unsigned int pll, unsigned int pll2)
{
int m1, m2, n1, n2, p;
/* mpll at 0x4020 and 0x4024; nvpll at 0x4000 and 0x4004 */
p = (pll >> 16) & 0x03;
m1 = pll2 & 0xFF;
n1 = (pll2 >> 8) & 0xFF;
/* Bit 8 of the first pll register can be used to disable the second set of dividers/multipliers. */
if(pll & 0x100)
{
m2 = 1;
n2 = 1;
}
/* NV49/NV4B cards seem to use a different calculation; I'm not sure how it works yet, so for now check the architecture. Further it looks like bit 15 can be used to identify it but I'm not sure yet.
*/
else if((nv_card->arch & (NV49 | NV4B)) && (pll & 0x1000))
{
m2 = 1;
n2 = 1;
p = 0;
}
else
{
m2 = (pll2 >> 16) & 0xFF;
n2 = (pll2 >> 24) & 0xFF;
}
if(nv_card->debug)
printf("m1=%d m2=%d n1=%d n2=%d p=%d\n", m1, m2, n1, n2, p);
return (float)CalcSpeed_nv40(base_freq, m1, m2, n1, n2, p)/1000;
}
static void ClockSelectFractional_nv40(int clockIn, unsigned int pllIn, unsigned int *pllOut, unsigned int *pllBOut)
{
unsigned diff, diffOld;
unsigned VClk, Freq;
unsigned m, m2, n, n2, p=0;
int base_freq = 27000;
diffOld = 0xFFFFFFFF;
if(clockIn < 125)
p = 3;
else if(clockIn < 250)
p = 2;
else if(clockIn < 500)
p = 1;
else
p = 0;
VClk = (unsigned)clockIn;
Freq = VClk;
if ((Freq >= 75000) && (Freq <= 1200000))
{
for(m = 1; m <= 4; m++)
{
for (m2 = 1; m2 <= 4; m2++)
{
for(n = 1; n <= 31; n++)
{
n2 = (int)((float)((VClk << p) * m * m2) / (float)(base_freq * n)+.5);
if((n2 < 24) && (n >= n2) && (m >= m2))
{
Freq = ((base_freq * n * n2) / (m * m2)) >> p;
if (Freq > VClk)
diff = Freq - VClk;
else
diff = VClk - Freq;
/* When the difference is 0 or less than .5% accept the speed */
if(((diff == 0) || ((float)diff/(float)clockIn <= 0.001)))
{
/* What do the 0x1c and 0xe mean? further there is some bit in pllOut that is sometimes 1 */
*pllOut = (pllIn & 0xfffcffff) + (p << 16);
*pllBOut = m + (n<<8) + (m2<<16) + (n2 << 24);
return;
}
if (diff < diffOld)
{
*pllOut = (pllIn & 0xfffcffff) + (p << 16);
*pllBOut = m + (n<<8) + (m2<<16) + (n2 << 24);
diffOld = diff;
}
}
}
}
}
}
}
static void ClockSelectInteger_nv40(int clockIn, unsigned int pllIn, unsigned int *pllOut, unsigned int *pllBOut)
{
int m, n, p, bestm, bestn, bestp;
int diff, diffOld, mlow, mhigh, nlow, nhigh, plow, phigh;
int done = 0;
diffOld = clockIn;
/* NV49/NV4B integer pll algorithm for memory */
if(pllIn & 0x1000)
{
mlow = 1;
mhigh = 13;
nlow = 1;
nhigh = 95;
p = 0;
}
else
{
/* The values below are based on observations on a Geforce6600GT at clocks between 75MHz and 250MHz */
mlow = 2;
mhigh = 13;
nlow = 10;
nhigh = 75;
if(clockIn < 125*1000)
p = 1;
else
p = 0;
}
/*
Calculate the m and n values. There are a lot of values which give the same speed;
We choose the speed for which the difference with the request speed is as small as possible.
*/
for(m = mlow; m <= mhigh && !done; m++)
{
for(n=nlow; n<= nhigh && !done; n++)
{
diff = abs((int)(clockIn - CalcSpeed_nv40(27000, m, 1, n, 1, p)));
/* When the difference is 0 or less than .5% accept the speed */
if(((diff == 0) || ((float)diff/(float)clockIn <= 0.005)))
{
bestm = m;
bestn = n;
bestp = p;
done = 1;
}
/* When the new difference is smaller than the old one, use this one */
if(diff < diffOld)
{
diffOld = diff;
bestm = m;
bestn = n;
bestp = p;
}
}
}
/* NV49/NV4B integer pll algorithm for memory */
if(pllIn & 0x1000)
{
/* pllOut isn't used. I'm not sure what's in it but I haven't seen its contents changing for different clocks */
*pllBOut = (bestn << 8) | bestm;
}
else
{
/* Bit8 selects the integer mode, further bit31 seems to be set in all cases. Some other bits like bit30 are set in some cases too but I don't know when. This looks more or less correct */
*pllOut = 0x80000000 + (bestp << 16) + 0x11c;
/* M2 and N2 need to be set to 1 */
*pllBOut = (1<<24) + (31 << 16) + (bestn << 8) + bestm;
}
}
static void ClockSelect_nv40(int clockIn, unsigned int pllIn, unsigned int *pllOut, unsigned int *pllBOut)
{
/* The gpu will be programmed in integer mode for now when that is currently enabled on the card */
if((pllIn & 0x1000) || (pllIn & 0x100))
ClockSelectInteger_nv40(clockIn, pllIn, pllOut, pllBOut);
else
ClockSelectFractional_nv40(clockIn, pllIn, pllOut, pllBOut);
}
static float nv40_get_gpu_speed()
{
int pll = nv_card->PMC[0x4000/4];
int pll2 = nv_card->PMC[0x4004/4];
if(nv_card->debug == 1)
{
printf("NVPLL_COEFF=%08x\n", pll);
printf("NVPLL2_COEFF=%08x\n", pll2);
}
return (float)GetClock_nv40(nv_card->base_freq, pll, pll2);
}
static void nv40_set_gpu_speed(unsigned int nvclk)
{
unsigned int PLL=0, PLL2=0;
nvclk *= 1000;
ClockSelect_nv40(nvclk, nv_card->PMC[0x4000/4], &PLL, &PLL2);
/* When no speed is found, don't change the PLL */
/* The algorithm doesn't allow too low speeds */
if(PLL)
{
if(nv_card->debug)
{
printf("NVPLL_COEFF: %08x\n", PLL);
printf("NVPLL2_COEFF: %08x\n", PLL2);
}
nv_card->PMC[0x4000/4] = PLL;
nv_card->PMC[0x4004/4] = PLL2;
}
}
static float nv40_get_memory_speed()
{
int pll = nv_card->PMC[0x4020/4];
int pll2 = nv_card->PMC[0x4024/4];
if(nv_card->debug == 1)
{
printf("MPLL_COEFF=%08x\n", pll);
printf("MPLL2_COEFF=%08x\n", pll2);
}
return (float)GetClock_nv40(nv_card->base_freq, pll, pll2);
}
static void nv40_set_memory_speed(unsigned int memclk)
{
unsigned int PLL=0, PLL2=0;
memclk *= 1000;
ClockSelect_nv40(memclk, nv_card->PMC[0x4020/4], &PLL, &PLL2);
/* When no speed is found, don't change the PLL */
/* The algorithm doesn't allow too low speeds */
if(PLL)
{
if(nv_card->debug)
{
printf("MPLL_COEFF: %08x\n", PLL);
printf("MPLL2_COEFF: %08x\n", PLL2);
}
/* It seems that different NV4X GPUs contain multiple memory clocks.
/ A 6800 card has 4 of them, a 6600GT 2 of them and a NV44 (6200) 1.
/ Very likely this is related to the width of the memory bus, which
/ is 256bit on the 6800, 128bit on the 6600GT (NV43) and 64bit on the NV44.
/
/ The code below handles the setting of the extra clockspeeds.
*/
switch(nv_card->arch)
{
case NV40:
case NV41:
case NV47:
nv_card->PMC[0x402c/4] = PLL;
nv_card->PMC[0x4030/4] = PLL2;
nv_card->PMC[0x4044/4] = PLL;
nv_card->PMC[0x4048/4] = PLL2;
case NV43:
case NV49:
case NV4B:
nv_card->PMC[0x4038/4] = PLL;
nv_card->PMC[0x403c/4] = PLL2;
case NV44:
nv_card->PMC[0x4020/4] = PLL;
nv_card->PMC[0x4024/4] = PLL2;
}
}
}
static void nv40_reset_gpu_speed()
{
/* Set the gpu speed */
nv_card->PMC[0x4000/4] = nv_card->nvpll;
nv_card->PMC[0x4004/4] = nv_card->nvpll2;
}
static void nv40_reset_memory_speed()
{
/* Set the memory speed */
nv_card->PMC[0x4024/4] = nv_card->mpll2;
switch(nv_card->arch)
{
case NV40:
case NV41:
case NV47:
nv_card->PMC[0x402c/4] = nv_card->mpll;
nv_card->PMC[0x4030/4] = nv_card->mpll2;
nv_card->PMC[0x4044/4] = nv_card->mpll;
nv_card->PMC[0x4048/4] = nv_card->mpll2;
case NV43:
case NV49:
case NV4B:
nv_card->PMC[0x4038/4] = nv_card->mpll;
nv_card->PMC[0x403c/4] = nv_card->mpll2;
case NV44:
nv_card->PMC[0x4020/4] = nv_card->mpll;
nv_card->PMC[0x4024/4] = nv_card->mpll2;
}
}
static void nv40_set_state(int state)
{
nv_card->state = state;
#ifdef HAVE_NVCONTROL
if(state & (STATE_2D | STATE_3D))
{
nv_card->get_gpu_speed = nvcontrol_get_gpu_speed;
nv_card->get_memory_speed = nvcontrol_get_memory_speed;
nv_card->set_gpu_speed = nvcontrol_set_gpu_speed;
nv_card->set_memory_speed = nvcontrol_set_memory_speed;
nv_card->reset_gpu_speed = nvcontrol_reset_gpu_speed;
nv_card->reset_memory_speed = nvcontrol_reset_memory_speed;
}
else
#endif
{
nv_card->get_gpu_speed = nv40_get_gpu_speed;
nv_card->get_memory_speed = nv40_get_memory_speed;
nv_card->set_memory_speed = nv40_set_memory_speed;
nv_card->set_gpu_speed = nv40_set_gpu_speed;
nv_card->reset_gpu_speed = nv40_reset_gpu_speed;
nv_card->reset_memory_speed = nv40_reset_memory_speed;
}
}
void nv40_init(void)
{
nv_card->base_freq = 27000;
nv_card->set_state = nv40_set_state;
nv_card->set_state(nv_card->state); /* Set the clock function pointers */
nv_card->get_default_mask = nv40_get_default_mask;
nv_card->get_hw_masked_units = nv40_get_hw_masked_units;
nv_card->get_sw_masked_units = nv40_get_sw_masked_units;
nv_card->get_pixel_pipelines = nv40_get_pixel_pipelines;
nv_card->get_vertex_pipelines = nv40_get_vertex_pipelines;
/* For now enable modding on NV40 cards and NV43 revisions prior to A4; other cards are locked */
if((nv_card->arch & NV40) || ((nv_card->arch & NV43) && (nv_card->get_gpu_revision() < 0xA4)))
{
nv_card->caps |= PIPELINE_MODDING;
nv_card->set_pixel_pipelines = nv40_set_pixel_pipelines;
nv_card->set_vertex_pipelines = nv40_set_vertex_pipelines;
}
/* For now support Smartdimmer on 6200Go(0x160)/7600Go(0x390) laptops */
if((((nv_card->device_id & 0xff0) == 0x160) || ((nv_card->device_id & 0xff0) == 0x390)) && nv_card->gpu == MOBILE)
{
nv_card->caps |= SMARTDIMMER;
nv_card->mobile_get_smartdimmer = nv44_mobile_get_smartdimmer;
nv_card->mobile_set_smartdimmer = nv44_mobile_set_smartdimmer;
}
/* Temperature monitoring; all cards after the NV40 feature an internal temperature sensor.
/ Only it is disabled on most 6200/6600(GT) cards but we can re-enable it ;)
*/
if((nv_card->arch & (NV43 | NV44 | NV46 | NV47 | NV49 | NV4B)) && !(nv_card->caps & GPU_TEMP_MONITORING))
{
nv_card->caps |= GPU_TEMP_MONITORING;
nv_card->sensor_name = (char*)strdup("GPU Internal Sensor");
nv_card->get_gpu_temp = (int(*)(I2CDevPtr))nv43_get_gpu_temp;
}
/* Fanspeed monitoring; bit 31 is an indication if fanspeed monitoring is available
/ Note this bit isn't very reliable as it is set on cards with advanced sensors too.
/ Should the NV44 use the NV43 codepath?
*/
if(((nv_card->arch & (NV40 | NV49)) && (nv_card->PMC[0x10f0/4] & 0x80000000)) && !(nv_card->caps & I2C_FANSPEED_MONITORING))
{
nv_card->caps |= GPU_FANSPEED_MONITORING;
nv_card->get_fanspeed = nv40_get_fanspeed;
nv_card->set_fanspeed = nv40_set_fanspeed;
}
else if(((nv_card->arch & (NV41 | NV43 | NV44 | NV47)) && (nv_card->PMC[0x15f4/4] & 0x80000000)) && !(nv_card->caps & I2C_FANSPEED_MONITORING))
{
nv_card->caps |= GPU_FANSPEED_MONITORING;
nv_card->get_fanspeed = nv43_get_fanspeed;
nv_card->set_fanspeed = nv43_set_fanspeed;
}
/* Mobile GPU check; we don't want to overclock those unless the user wants it */
if(nv_card->gpu == MOBILE)
{
nv_card->caps = ~(~nv_card->caps | GPU_OVERCLOCKING | MEM_OVERCLOCKING);
}
else
nv_card->caps |= (GPU_OVERCLOCKING | MEM_OVERCLOCKING);
/* Set the speed range */
if(nv_card->bios)
{
/* Most Geforce6 bioses just have one active entry but some Geforce6 6800(Ultra) bioses have 2 entries
/ in that case the first one contains the highest clocks (3d?). Further there are 6600GT cards with
/ also two entries for which the second entry contains the 3d clock.
*/
if((nv_card->bios->perf_entries == 1) || (nv_card->bios->perf_lst[0].nvclk > nv_card->bios->perf_lst[1].nvclk))
{
nv_card->memclk_3d = (short)nv_card->bios->perf_lst[0].memclk;
nv_card->nvclk_3d = (short)nv_card->bios->perf_lst[0].nvclk;
}
else
{
/* 6600GT cards have 2d/3d clocks again; the second entries are the 3d ones.
/ We use the 2d entries for the minimum clocks and the 3d ones for the maximum ones.
*/
nv_card->memclk_3d = (short)nv_card->bios->perf_lst[1].memclk;
nv_card->nvclk_3d = (short)nv_card->bios->perf_lst[1].nvclk;
}
nv_card->memclk_min = (short)(nv_card->bios->perf_lst[0].memclk * .75);
nv_card->memclk_max = nv_card->memclk_3d * 1.25;
nv_card->nvclk_min = (short)(nv_card->bios->perf_lst[0].nvclk * .75);
nv_card->nvclk_max = nv_card->nvclk_3d * 1.25;
/* FIXME: Divide the memory clocks by two on Geforc7600/7900 cards because we program the 'real' clocks for those instead of the effective DDR ones which are twice as high */
if(nv_card->arch & (NV49 | NV4B))
{
nv_card->memclk_min /= 2;
nv_card->memclk_max /= 2;
}
}
else
{
float memclk = GetClock_nv40(nv_card->base_freq, nv_card->mpll, nv_card->mpll2);
float nvclk = GetClock_nv40(nv_card->base_freq, nv_card->nvpll, nv_card->nvpll2);
/* Not great but better than nothing .. */
nv_card->memclk_min = (short)(memclk * .75);
nv_card->memclk_max = (short)(memclk * 1.5);
nv_card->nvclk_min = (short)(nvclk * .75);
nv_card->nvclk_max = (short)(nvclk * 1.5);
}
}
syntax highlighted by Code2HTML, v. 0.9.1